gravatar
kpeter (Peter Kovacs)
kpeter@inf.elte.hu
Improve bib2dox.py using \section for entiries (#184)
0 1 0
default
1 file changed with 11 insertions and 11 deletions:
↑ Collapse diff ↑
Show white space 48 line context
... ...
@@ -65,48 +65,52 @@
65 65
# set of valid name characters
66 66
valid_name_chars = '[\w\-:]'
67 67

	
68 68
#
69 69
# define global regular expression variables
70 70
#
71 71
author_rex = re.compile('\s+and\s+')
72 72
rembraces_rex = re.compile('[{}]')
73 73
capitalize_rex = re.compile('({\w*})')
74 74

	
75 75
# used by bibtexkeywords(data)
76 76
keywords_rex = re.compile('[,;]')
77 77

	
78 78
# used by concat_line(line)
79 79
concatsplit_rex = re.compile('\s*#\s*')
80 80

	
81 81
# split on {, }, or " in verify_out_of_braces
82 82
delimiter_rex = re.compile('([{}"])',re.I)
83 83

	
84 84
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
85 85
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
86 86

	
87 87
url_rex = re.compile('\\\url\{([^}]*)\}')
88 88

	
89
#
90
# styles for html formatting
91
#
92
divstyle = 'margin-top: -4ex; margin-left: 8em;'
89 93

	
90 94
#
91 95
# return the string parameter without braces
92 96
#
93 97
def transformurls(str):
94 98
    return url_rex.sub(r'<a href="\1">\1</a>', str)
95 99

	
96 100
#
97 101
# return the string parameter without braces
98 102
#
99 103
def removebraces(str):
100 104
    return rembraces_rex.sub('', str)
101 105

	
102 106
#
103 107
# latex-specific replacements
104 108
# (do this after braces were removed)
105 109
#
106 110
def latexreplacements(line):
107 111
    line = string.replace(line, '~', '&nbsp;')
108 112
    line = string.replace(line, '\\\'a', '&aacute;')
109 113
    line = string.replace(line, '\\"a', '&auml;')
110 114
    line = string.replace(line, '\\\'e', '&eacute;')
111 115
    line = string.replace(line, '\\"e', '&euml;')
112 116
    line = string.replace(line, '\\\'i', '&iacute;')
... ...
@@ -263,49 +267,49 @@
263 267
    endtype_rex = re.compile('}\s*$')
264 268
    endtag_rex = re.compile('^\s*}\s*$')
265 269

	
266 270
    bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
267 271
    bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
268 272

	
269 273
    quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
270 274
    quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
271 275

	
272 276
    for line in filecont_source:
273 277
        line = line[:-1]
274 278

	
275 279
        # encode character entities
276 280
        line = string.replace(line, '&', '&amp;')
277 281
        line = string.replace(line, '<', '&lt;')
278 282
        line = string.replace(line, '>', '&gt;')
279 283

	
280 284
        # start entry: publication type (store for later use)
281 285
        if pubtype_rex.match(line):
282 286
        # want @<alphanumeric chars><spaces>{<spaces><any chars>,
283 287
            entrycont = {}
284 288
            entry = []
285 289
            entrytype = pubtype_rex.sub('\g<1>',line)
286 290
            entrytype = string.lower(entrytype)
287
            # entryid   = pubtype_rex.sub('\g<2>', line)
291
            entryid   = pubtype_rex.sub('\g<2>', line)
288 292

	
289 293
        # end entry if just a }
290 294
        elif endtype_rex.match(line):
291 295
            # generate doxygen code for the entry
292 296

	
293 297
            # enty type related formattings
294 298
            if entrytype in ('book', 'inbook'):
295 299
                entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
296 300
                if not entrycont.has_key('author'):
297 301
                    entrycont['author'] = entrycont['editor']
298 302
                    entrycont['author']['text'] += ', editors'
299 303
            elif entrytype == 'article':
300 304
                entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
301 305
            elif entrytype in ('inproceedings', 'incollection', 'conference'):
302 306
                entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
303 307
            elif entrytype == 'techreport':
304 308
                if not entrycont.has_key('type'):
305 309
                    entrycont['type'] = 'Technical report'
306 310
            elif entrytype == 'mastersthesis':
307 311
                entrycont['type'] = 'Master\'s thesis'
308 312
            elif entrytype == 'phdthesis':
309 313
                entrycont['type'] = 'PhD thesis'
310 314

	
311 315
            for eline in entrycont:
... ...
@@ -358,48 +362,49 @@
358 362
                else:
359 363
                    entry.append(entrycont['year'] + '.')
360 364
            if entrycont.has_key('note') and (entrycont['note'] != ''):
361 365
                entry.append(entrycont['note'] + '.')
362 366

	
363 367
            # generate keys for sorting and for the output
364 368
            sortkey = ''
365 369
            bibkey = ''
366 370
            if entrycont.has_key('author'):
367 371
                for author in entrycont['author']['list']:
368 372
                    sortkey += copychars(author, author.rfind(' ')+1, len(author))
369 373
                bibkey = entrycont['author']['abbrev']
370 374
            else:
371 375
                bibkey = 'x'
372 376
            if entrycont.has_key('year'):
373 377
                sortkey += entrycont['year']
374 378
                bibkey += entrycont['year'][-2:]
375 379
            if entrycont.has_key('title'):
376 380
                sortkey += entrycont['title']
377 381
            if entrycont.has_key('key'):
378 382
                sortkey = entrycont['key'] + sortkey
379 383
                bibkey = entrycont['key']
380 384
            entry.insert(0, sortkey)
381 385
            entry.insert(1, bibkey)
386
            entry.insert(2, entryid)
382 387
           
383 388
            # add the entry to the file contents
384 389
            filecont.append(entry)
385 390

	
386 391
        else:
387 392
            # field, publication info
388 393
            field = ''
389 394
            data = ''
390 395
            
391 396
            # field = {data} entries
392 397
            if bracedata_rex.match(line):
393 398
                field = bracefield_rex.sub('\g<1>', line)
394 399
                field = string.lower(field)
395 400
                data =  bracedata_rex.sub('\g<2>', line)
396 401

	
397 402
            # field = "data" entries
398 403
            elif quotedata_rex.match(line):
399 404
                field = quotefield_rex.sub('\g<1>', line)
400 405
                field = string.lower(field)
401 406
                data =  quotedata_rex.sub('\g<2>', line)
402 407

	
403 408
            # field = data entries
404 409
            elif data_rex.match(line):
405 410
                field = field_rex.sub('\g<1>', line)
... ...
@@ -418,62 +423,61 @@
418 423
                line = latexreplacements(line)
419 424
                entrycont[field] = line
420 425

	
421 426

	
422 427
    # sort entries
423 428
    filecont.sort(entry_cmp)
424 429
    
425 430
    # count the bibtex keys
426 431
    keytable = {}
427 432
    counttable = {}
428 433
    for entry in filecont:
429 434
        bibkey = entry[1]
430 435
        if not keytable.has_key(bibkey):
431 436
            keytable[bibkey] = 1
432 437
        else:
433 438
            keytable[bibkey] += 1
434 439

	
435 440
    for bibkey in keytable.keys():
436 441
        counttable[bibkey] = 0
437 442
    
438 443
    # generate output
439 444
    for entry in filecont:
440 445
        # generate output key form the bibtex key
441 446
        bibkey = entry[1]
447
        entryid = entry[2]
442 448
        if keytable[bibkey] == 1:
443 449
            outkey = bibkey
444 450
        else:
445 451
            outkey = bibkey + chr(97 + counttable[bibkey])
446 452
        counttable[bibkey] += 1
447 453
        
448 454
        # append the entry code to the output
449
        file.append('<tr valign="top">\n' + \
450
                    '<td>[' + outkey + ']</td>')
451
        file.append('<td>')
452
        file.append('\\anchor ' + outkey)
453
        for line in entry[2:]:
455
        file.append('\\section ' + entryid + ' [' + outkey + ']')
456
        file.append('<div style="' + divstyle + '">')
457
        for line in entry[3:]:
454 458
            file.append(line)
455
        file.append('</td>\n</tr>')
459
        file.append('</div>')
456 460
        file.append('')
457 461

	
458 462
    return file
459 463

	
460 464

	
461 465
#
462 466
# return 1 iff abbr is in line but not inside braces or quotes
463 467
# assumes that abbr appears only once on the line (out of braces and quotes)
464 468
#
465 469
def verify_out_of_braces(line, abbr):
466 470

	
467 471
    phrase_split = delimiter_rex.split(line)
468 472

	
469 473
    abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
470 474

	
471 475
    open_brace = 0
472 476
    open_quote = 0
473 477

	
474 478
    for phrase in phrase_split:
475 479
        if phrase == "{":
476 480
            open_brace = open_brace + 1
477 481
        elif phrase == "}":
478 482
            open_brace = open_brace - 1
479 483
        elif phrase == '"':
... ...
@@ -759,48 +763,44 @@
759 763
        filecont[i] = line + '\n'
760 764
        i = i+1
761 765

	
762 766
    # get rid of the extra stuff at the end of the array
763 767
    # (The extra stuff are duplicates that are in the array because
764 768
    # blank lines were removed.)
765 769
    length = len( filecont)
766 770
    filecont[length-j:length] = []
767 771

	
768 772
    return filecont
769 773

	
770 774

	
771 775
def filehandler(filepath):
772 776
    try:
773 777
        fd = open(filepath, 'r')
774 778
        filecont_source = fd.readlines()
775 779
        fd.close()
776 780
    except:
777 781
        print 'Could not open file:', filepath
778 782
    washeddata = bibtexwasher(filecont_source)
779 783
    outdata = bibtexdecoder(washeddata)
780 784
    print '/**'
781 785
    print '\page references References'
782 786
    print
783
    print '<table border="0" cellspacing="5px" width="100%">'
784
    print
785 787
    for line in outdata:
786 788
        print line
787
    print '</table>'
788
    print
789 789
    print '*/'
790 790

	
791 791

	
792 792
# main program
793 793

	
794 794
def main():
795 795
    import sys
796 796
    if sys.argv[1:]:
797 797
        filepath = sys.argv[1]
798 798
    else:
799 799
        print "No input file"
800 800
        sys.exit()
801 801
    filehandler(filepath)
802 802

	
803 803
if __name__ == "__main__": main()
804 804

	
805 805

	
806 806
# end python script
0 comments (0 inline)