| ... | ... |
@@ -65,48 +65,52 @@ |
| 65 | 65 |
# set of valid name characters |
| 66 | 66 |
valid_name_chars = '[\w\-:]' |
| 67 | 67 |
|
| 68 | 68 |
# |
| 69 | 69 |
# define global regular expression variables |
| 70 | 70 |
# |
| 71 | 71 |
author_rex = re.compile('\s+and\s+')
|
| 72 | 72 |
rembraces_rex = re.compile('[{}]')
|
| 73 | 73 |
capitalize_rex = re.compile('({\w*})')
|
| 74 | 74 |
|
| 75 | 75 |
# used by bibtexkeywords(data) |
| 76 | 76 |
keywords_rex = re.compile('[,;]')
|
| 77 | 77 |
|
| 78 | 78 |
# used by concat_line(line) |
| 79 | 79 |
concatsplit_rex = re.compile('\s*#\s*')
|
| 80 | 80 |
|
| 81 | 81 |
# split on {, }, or " in verify_out_of_braces
|
| 82 | 82 |
delimiter_rex = re.compile('([{}"])',re.I)
|
| 83 | 83 |
|
| 84 | 84 |
field_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
| 85 | 85 |
data_rex = re.compile('\s*(\w*)\s*=\s*([^,]*),?')
|
| 86 | 86 |
|
| 87 | 87 |
url_rex = re.compile('\\\url\{([^}]*)\}')
|
| 88 | 88 |
|
| 89 |
# |
|
| 90 |
# styles for html formatting |
|
| 91 |
# |
|
| 92 |
divstyle = 'margin-top: -4ex; margin-left: 8em;' |
|
| 89 | 93 |
|
| 90 | 94 |
# |
| 91 | 95 |
# return the string parameter without braces |
| 92 | 96 |
# |
| 93 | 97 |
def transformurls(str): |
| 94 | 98 |
return url_rex.sub(r'<a href="\1">\1</a>', str) |
| 95 | 99 |
|
| 96 | 100 |
# |
| 97 | 101 |
# return the string parameter without braces |
| 98 | 102 |
# |
| 99 | 103 |
def removebraces(str): |
| 100 | 104 |
return rembraces_rex.sub('', str)
|
| 101 | 105 |
|
| 102 | 106 |
# |
| 103 | 107 |
# latex-specific replacements |
| 104 | 108 |
# (do this after braces were removed) |
| 105 | 109 |
# |
| 106 | 110 |
def latexreplacements(line): |
| 107 | 111 |
line = string.replace(line, '~', ' ') |
| 108 | 112 |
line = string.replace(line, '\\\'a', 'á') |
| 109 | 113 |
line = string.replace(line, '\\"a', 'ä') |
| 110 | 114 |
line = string.replace(line, '\\\'e', 'é') |
| 111 | 115 |
line = string.replace(line, '\\"e', 'ë') |
| 112 | 116 |
line = string.replace(line, '\\\'i', 'í') |
| ... | ... |
@@ -263,49 +267,49 @@ |
| 263 | 267 |
endtype_rex = re.compile('}\s*$')
|
| 264 | 268 |
endtag_rex = re.compile('^\s*}\s*$')
|
| 265 | 269 |
|
| 266 | 270 |
bracefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
| 267 | 271 |
bracedata_rex = re.compile('\s*(\w*)\s*=\s*{(.*)},?')
|
| 268 | 272 |
|
| 269 | 273 |
quotefield_rex = re.compile('\s*(\w*)\s*=\s*(.*)')
|
| 270 | 274 |
quotedata_rex = re.compile('\s*(\w*)\s*=\s*"(.*)",?')
|
| 271 | 275 |
|
| 272 | 276 |
for line in filecont_source: |
| 273 | 277 |
line = line[:-1] |
| 274 | 278 |
|
| 275 | 279 |
# encode character entities |
| 276 | 280 |
line = string.replace(line, '&', '&') |
| 277 | 281 |
line = string.replace(line, '<', '<') |
| 278 | 282 |
line = string.replace(line, '>', '>') |
| 279 | 283 |
|
| 280 | 284 |
# start entry: publication type (store for later use) |
| 281 | 285 |
if pubtype_rex.match(line): |
| 282 | 286 |
# want @<alphanumeric chars><spaces>{<spaces><any chars>,
|
| 283 | 287 |
entrycont = {}
|
| 284 | 288 |
entry = [] |
| 285 | 289 |
entrytype = pubtype_rex.sub('\g<1>',line)
|
| 286 | 290 |
entrytype = string.lower(entrytype) |
| 287 |
|
|
| 291 |
entryid = pubtype_rex.sub('\g<2>', line)
|
|
| 288 | 292 |
|
| 289 | 293 |
# end entry if just a } |
| 290 | 294 |
elif endtype_rex.match(line): |
| 291 | 295 |
# generate doxygen code for the entry |
| 292 | 296 |
|
| 293 | 297 |
# enty type related formattings |
| 294 | 298 |
if entrytype in ('book', 'inbook'):
|
| 295 | 299 |
entrycont['title'] = '<em>' + entrycont['title'] + '</em>' |
| 296 | 300 |
if not entrycont.has_key('author'):
|
| 297 | 301 |
entrycont['author'] = entrycont['editor'] |
| 298 | 302 |
entrycont['author']['text'] += ', editors' |
| 299 | 303 |
elif entrytype == 'article': |
| 300 | 304 |
entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>' |
| 301 | 305 |
elif entrytype in ('inproceedings', 'incollection', 'conference'):
|
| 302 | 306 |
entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>' |
| 303 | 307 |
elif entrytype == 'techreport': |
| 304 | 308 |
if not entrycont.has_key('type'):
|
| 305 | 309 |
entrycont['type'] = 'Technical report' |
| 306 | 310 |
elif entrytype == 'mastersthesis': |
| 307 | 311 |
entrycont['type'] = 'Master\'s thesis' |
| 308 | 312 |
elif entrytype == 'phdthesis': |
| 309 | 313 |
entrycont['type'] = 'PhD thesis' |
| 310 | 314 |
|
| 311 | 315 |
for eline in entrycont: |
| ... | ... |
@@ -358,48 +362,49 @@ |
| 358 | 362 |
else: |
| 359 | 363 |
entry.append(entrycont['year'] + '.') |
| 360 | 364 |
if entrycont.has_key('note') and (entrycont['note'] != ''):
|
| 361 | 365 |
entry.append(entrycont['note'] + '.') |
| 362 | 366 |
|
| 363 | 367 |
# generate keys for sorting and for the output |
| 364 | 368 |
sortkey = '' |
| 365 | 369 |
bibkey = '' |
| 366 | 370 |
if entrycont.has_key('author'):
|
| 367 | 371 |
for author in entrycont['author']['list']: |
| 368 | 372 |
sortkey += copychars(author, author.rfind(' ')+1, len(author))
|
| 369 | 373 |
bibkey = entrycont['author']['abbrev'] |
| 370 | 374 |
else: |
| 371 | 375 |
bibkey = 'x' |
| 372 | 376 |
if entrycont.has_key('year'):
|
| 373 | 377 |
sortkey += entrycont['year'] |
| 374 | 378 |
bibkey += entrycont['year'][-2:] |
| 375 | 379 |
if entrycont.has_key('title'):
|
| 376 | 380 |
sortkey += entrycont['title'] |
| 377 | 381 |
if entrycont.has_key('key'):
|
| 378 | 382 |
sortkey = entrycont['key'] + sortkey |
| 379 | 383 |
bibkey = entrycont['key'] |
| 380 | 384 |
entry.insert(0, sortkey) |
| 381 | 385 |
entry.insert(1, bibkey) |
| 386 |
entry.insert(2, entryid) |
|
| 382 | 387 |
|
| 383 | 388 |
# add the entry to the file contents |
| 384 | 389 |
filecont.append(entry) |
| 385 | 390 |
|
| 386 | 391 |
else: |
| 387 | 392 |
# field, publication info |
| 388 | 393 |
field = '' |
| 389 | 394 |
data = '' |
| 390 | 395 |
|
| 391 | 396 |
# field = {data} entries
|
| 392 | 397 |
if bracedata_rex.match(line): |
| 393 | 398 |
field = bracefield_rex.sub('\g<1>', line)
|
| 394 | 399 |
field = string.lower(field) |
| 395 | 400 |
data = bracedata_rex.sub('\g<2>', line)
|
| 396 | 401 |
|
| 397 | 402 |
# field = "data" entries |
| 398 | 403 |
elif quotedata_rex.match(line): |
| 399 | 404 |
field = quotefield_rex.sub('\g<1>', line)
|
| 400 | 405 |
field = string.lower(field) |
| 401 | 406 |
data = quotedata_rex.sub('\g<2>', line)
|
| 402 | 407 |
|
| 403 | 408 |
# field = data entries |
| 404 | 409 |
elif data_rex.match(line): |
| 405 | 410 |
field = field_rex.sub('\g<1>', line)
|
| ... | ... |
@@ -418,62 +423,61 @@ |
| 418 | 423 |
line = latexreplacements(line) |
| 419 | 424 |
entrycont[field] = line |
| 420 | 425 |
|
| 421 | 426 |
|
| 422 | 427 |
# sort entries |
| 423 | 428 |
filecont.sort(entry_cmp) |
| 424 | 429 |
|
| 425 | 430 |
# count the bibtex keys |
| 426 | 431 |
keytable = {}
|
| 427 | 432 |
counttable = {}
|
| 428 | 433 |
for entry in filecont: |
| 429 | 434 |
bibkey = entry[1] |
| 430 | 435 |
if not keytable.has_key(bibkey): |
| 431 | 436 |
keytable[bibkey] = 1 |
| 432 | 437 |
else: |
| 433 | 438 |
keytable[bibkey] += 1 |
| 434 | 439 |
|
| 435 | 440 |
for bibkey in keytable.keys(): |
| 436 | 441 |
counttable[bibkey] = 0 |
| 437 | 442 |
|
| 438 | 443 |
# generate output |
| 439 | 444 |
for entry in filecont: |
| 440 | 445 |
# generate output key form the bibtex key |
| 441 | 446 |
bibkey = entry[1] |
| 447 |
entryid = entry[2] |
|
| 442 | 448 |
if keytable[bibkey] == 1: |
| 443 | 449 |
outkey = bibkey |
| 444 | 450 |
else: |
| 445 | 451 |
outkey = bibkey + chr(97 + counttable[bibkey]) |
| 446 | 452 |
counttable[bibkey] += 1 |
| 447 | 453 |
|
| 448 | 454 |
# append the entry code to the output |
| 449 |
file.append('<tr valign="top">\n' + \
|
|
| 450 |
'<td>[' + outkey + ']</td>') |
|
| 451 |
file.append('<td>')
|
|
| 452 |
file.append('\\anchor ' + outkey)
|
|
| 453 |
|
|
| 455 |
file.append('\\section ' + entryid + ' [' + outkey + ']')
|
|
| 456 |
file.append('<div style="' + divstyle + '">')
|
|
| 457 |
for line in entry[3:]: |
|
| 454 | 458 |
file.append(line) |
| 455 |
file.append('</
|
|
| 459 |
file.append('</div>')
|
|
| 456 | 460 |
file.append('')
|
| 457 | 461 |
|
| 458 | 462 |
return file |
| 459 | 463 |
|
| 460 | 464 |
|
| 461 | 465 |
# |
| 462 | 466 |
# return 1 iff abbr is in line but not inside braces or quotes |
| 463 | 467 |
# assumes that abbr appears only once on the line (out of braces and quotes) |
| 464 | 468 |
# |
| 465 | 469 |
def verify_out_of_braces(line, abbr): |
| 466 | 470 |
|
| 467 | 471 |
phrase_split = delimiter_rex.split(line) |
| 468 | 472 |
|
| 469 | 473 |
abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I) |
| 470 | 474 |
|
| 471 | 475 |
open_brace = 0 |
| 472 | 476 |
open_quote = 0 |
| 473 | 477 |
|
| 474 | 478 |
for phrase in phrase_split: |
| 475 | 479 |
if phrase == "{":
|
| 476 | 480 |
open_brace = open_brace + 1 |
| 477 | 481 |
elif phrase == "}": |
| 478 | 482 |
open_brace = open_brace - 1 |
| 479 | 483 |
elif phrase == '"': |
| ... | ... |
@@ -759,48 +763,44 @@ |
| 759 | 763 |
filecont[i] = line + '\n' |
| 760 | 764 |
i = i+1 |
| 761 | 765 |
|
| 762 | 766 |
# get rid of the extra stuff at the end of the array |
| 763 | 767 |
# (The extra stuff are duplicates that are in the array because |
| 764 | 768 |
# blank lines were removed.) |
| 765 | 769 |
length = len( filecont) |
| 766 | 770 |
filecont[length-j:length] = [] |
| 767 | 771 |
|
| 768 | 772 |
return filecont |
| 769 | 773 |
|
| 770 | 774 |
|
| 771 | 775 |
def filehandler(filepath): |
| 772 | 776 |
try: |
| 773 | 777 |
fd = open(filepath, 'r') |
| 774 | 778 |
filecont_source = fd.readlines() |
| 775 | 779 |
fd.close() |
| 776 | 780 |
except: |
| 777 | 781 |
print 'Could not open file:', filepath |
| 778 | 782 |
washeddata = bibtexwasher(filecont_source) |
| 779 | 783 |
outdata = bibtexdecoder(washeddata) |
| 780 | 784 |
print '/**' |
| 781 | 785 |
print '\page references References' |
| 782 | 786 |
|
| 783 |
print '<table border="0" cellspacing="5px" width="100%">' |
|
| 784 |
|
|
| 785 | 787 |
for line in outdata: |
| 786 | 788 |
print line |
| 787 |
print '</table>' |
|
| 788 |
|
|
| 789 | 789 |
print '*/' |
| 790 | 790 |
|
| 791 | 791 |
|
| 792 | 792 |
# main program |
| 793 | 793 |
|
| 794 | 794 |
def main(): |
| 795 | 795 |
import sys |
| 796 | 796 |
if sys.argv[1:]: |
| 797 | 797 |
filepath = sys.argv[1] |
| 798 | 798 |
else: |
| 799 | 799 |
print "No input file" |
| 800 | 800 |
sys.exit() |
| 801 | 801 |
filehandler(filepath) |
| 802 | 802 |
|
| 803 | 803 |
if __name__ == "__main__": main() |
| 804 | 804 |
|
| 805 | 805 |
|
| 806 | 806 |
# end python script |
0 comments (0 inline)