,
entrycont = {}
entry = []
entrytype = pubtype_rex.sub('\g<1>',line)
entrytype = string.lower(entrytype)
entryid = pubtype_rex.sub('\g<2>', line)
# end entry if just a }
elif endtype_rex.match(line):
# generate doxygen code for the entry
# enty type related formattings
if entrytype in ('book', 'inbook'):
entrycont['title'] = '' + entrycont['title'] + ''
if not entrycont.has_key('author'):
entrycont['author'] = entrycont['editor']
entrycont['author']['text'] += ', editors'
elif entrytype == 'article':
entrycont['journal'] = '' + entrycont['journal'] + ''
elif entrytype in ('inproceedings', 'incollection', 'conference'):
entrycont['booktitle'] = '' + entrycont['booktitle'] + ''
elif entrytype == 'techreport':
if not entrycont.has_key('type'):
entrycont['type'] = 'Technical report'
elif entrytype == 'mastersthesis':
entrycont['type'] = 'Master\'s thesis'
elif entrytype == 'phdthesis':
entrycont['type'] = 'PhD thesis'
for eline in entrycont:
if eline != '':
eline = latexreplacements(eline)
if entrycont.has_key('pages') and (entrycont['pages'] != ''):
entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
if entrycont.has_key('author') and (entrycont['author'] != ''):
entry.append(entrycont['author']['text'] + '.')
if entrycont.has_key('title') and (entrycont['title'] != ''):
entry.append(entrycont['title'] + '.')
if entrycont.has_key('journal') and (entrycont['journal'] != ''):
entry.append(entrycont['journal'] + ',')
if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
entry.append('In ' + entrycont['booktitle'] + ',')
if entrycont.has_key('type') and (entrycont['type'] != ''):
eline = entrycont['type']
if entrycont.has_key('number') and (entrycont['number'] != ''):
eline += ' ' + entrycont['number']
eline += ','
entry.append(eline)
if entrycont.has_key('institution') and (entrycont['institution'] != ''):
entry.append(entrycont['institution'] + ',')
if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
entry.append(entrycont['publisher'] + ',')
if entrycont.has_key('school') and (entrycont['school'] != ''):
entry.append(entrycont['school'] + ',')
if entrycont.has_key('address') and (entrycont['address'] != ''):
entry.append(entrycont['address'] + ',')
if entrycont.has_key('edition') and (entrycont['edition'] != ''):
entry.append(entrycont['edition'] + ' edition,')
if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
entry.append(entrycont['howpublished'] + ',')
if entrycont.has_key('volume') and (entrycont['volume'] != ''):
eline = entrycont['volume'];
if entrycont.has_key('number') and (entrycont['number'] != ''):
eline += '(' + entrycont['number'] + ')'
if entrycont.has_key('pages') and (entrycont['pages'] != ''):
eline += ':' + entrycont['pages']
eline += ','
entry.append(eline)
else:
if entrycont.has_key('pages') and (entrycont['pages'] != ''):
entry.append('pages ' + entrycont['pages'] + ',')
if entrycont.has_key('year') and (entrycont['year'] != ''):
if entrycont.has_key('month') and (entrycont['month'] != ''):
entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
else:
entry.append(entrycont['year'] + '.')
if entrycont.has_key('note') and (entrycont['note'] != ''):
entry.append(entrycont['note'] + '.')
# generate keys for sorting and for the output
sortkey = ''
bibkey = ''
if entrycont.has_key('author'):
for author in entrycont['author']['list']:
sortkey += copychars(author, author.rfind(' ')+1, len(author))
bibkey = entrycont['author']['abbrev']
else:
bibkey = 'x'
if entrycont.has_key('year'):
sortkey += entrycont['year']
bibkey += entrycont['year'][-2:]
if entrycont.has_key('title'):
sortkey += entrycont['title']
if entrycont.has_key('key'):
sortkey = entrycont['key'] + sortkey
bibkey = entrycont['key']
entry.insert(0, sortkey)
entry.insert(1, bibkey)
entry.insert(2, entryid)
# add the entry to the file contents
filecont.append(entry)
else:
# field, publication info
field = ''
data = ''
# field = {data} entries
if bracedata_rex.match(line):
field = bracefield_rex.sub('\g<1>', line)
field = string.lower(field)
data = bracedata_rex.sub('\g<2>', line)
# field = "data" entries
elif quotedata_rex.match(line):
field = quotefield_rex.sub('\g<1>', line)
field = string.lower(field)
data = quotedata_rex.sub('\g<2>', line)
# field = data entries
elif data_rex.match(line):
field = field_rex.sub('\g<1>', line)
field = string.lower(field)
data = data_rex.sub('\g<2>', line)
if field in ('author', 'editor'):
entrycont[field] = bibtexauthor(data)
line = ''
elif field == 'title':
line = bibtextitle(data, entrytype)
elif field != '':
line = removebraces(transformurls(data.strip()))
if line != '':
line = latexreplacements(line)
entrycont[field] = line
# sort entries
filecont.sort(entry_cmp)
# count the bibtex keys
keytable = {}
counttable = {}
for entry in filecont:
bibkey = entry[1]
if not keytable.has_key(bibkey):
keytable[bibkey] = 1
else:
keytable[bibkey] += 1
for bibkey in keytable.keys():
counttable[bibkey] = 0
# generate output
for entry in filecont:
# generate output key form the bibtex key
bibkey = entry[1]
entryid = entry[2]
if keytable[bibkey] == 1:
outkey = bibkey
else:
outkey = bibkey + chr(97 + counttable[bibkey])
counttable[bibkey] += 1
# append the entry code to the output
file.append('\\section ' + entryid + ' [' + outkey + ']')
file.append('')
for line in entry[3:]:
file.append(line)
file.append('
')
file.append('')
return file
#
# return 1 iff abbr is in line but not inside braces or quotes
# assumes that abbr appears only once on the line (out of braces and quotes)
#
def verify_out_of_braces(line, abbr):
phrase_split = delimiter_rex.split(line)
abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
open_brace = 0
open_quote = 0
for phrase in phrase_split:
if phrase == "{":
open_brace = open_brace + 1
elif phrase == "}":
open_brace = open_brace - 1
elif phrase == '"':
if open_quote == 1:
open_quote = 0
else:
open_quote = 1
elif abbr_rex.search(phrase):
if open_brace == 0 and open_quote == 0:
return 1
return 0
#
# a line in the form phrase1 # phrase2 # ... # phrasen
# is returned as phrase1 phrase2 ... phrasen
# with the correct punctuation
# Bug: Doesn't always work with multiple abbreviations plugged in
#
def concat_line(line):
# only look at part after equals
field = field_rex.sub('\g<1>',line)
rest = field_rex.sub('\g<2>',line)
concat_line = field + ' ='
pound_split = concatsplit_rex.split(rest)
phrase_count = 0
length = len(pound_split)
for phrase in pound_split:
phrase = phrase.strip()
if phrase_count != 0:
if phrase.startswith('"') or phrase.startswith('{'):
phrase = phrase[1:]
elif phrase.startswith('"'):
phrase = phrase.replace('"','{',1)
if phrase_count != length-1:
if phrase.endswith('"') or phrase.endswith('}'):
phrase = phrase[:-1]
else:
if phrase.endswith('"'):
phrase = phrase[:-1]
phrase = phrase + "}"
elif phrase.endswith('",'):
phrase = phrase[:-2]
phrase = phrase + "},"
# if phrase did have \#, add the \# back
if phrase.endswith('\\'):
phrase = phrase + "#"
concat_line = concat_line + ' ' + phrase
phrase_count = phrase_count + 1
return concat_line
#
# substitute abbreviations into filecont
# @param filecont_source - string of data from file
#
def bibtex_replace_abbreviations(filecont_source):
filecont = filecont_source.splitlines()
# These are defined in bibtex, so we'll define them too
abbr_list = ['jan','feb','mar','apr','may','jun',
'jul','aug','sep','oct','nov','dec']
value_list = ['January','February','March','April',
'May','June','July','August','September',
'October','November','December']
abbr_rex = []
total_abbr_count = 0
front = '\\b'
back = '(,?)\\b'
for x in abbr_list:
abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
total_abbr_count = total_abbr_count + 1
abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
re.I)
comment_rex = re.compile('@comment\s*{',re.I)
preamble_rex = re.compile('@preamble\s*{',re.I)
waiting_for_end_string = 0
i = 0
filecont2 = ''
for line in filecont:
if line == ' ' or line == '':
continue
if waiting_for_end_string:
if re.search('}',line):
waiting_for_end_string = 0
continue
if abbrdef_rex.search(line):
abbr = abbrdef_rex.sub('\g<1>', line)
if abbr_list.count(abbr) == 0:
val = abbrdef_rex.sub('\g<2>', line)
abbr_list.append(abbr)
value_list.append(string.strip(val))
abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
total_abbr_count = total_abbr_count + 1
waiting_for_end_string = 1
continue
if comment_rex.search(line):
waiting_for_end_string = 1
continue
if preamble_rex.search(line):
waiting_for_end_string = 1
continue
# replace subsequent abbreviations with the value
abbr_count = 0
for x in abbr_list:
if abbr_rex[abbr_count].search(line):
if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
# Check for # concatenations
if concatsplit_rex.search(line):
line = concat_line(line)
abbr_count = abbr_count + 1
filecont2 = filecont2 + line + '\n'
i = i+1
# Do one final pass over file
# make sure that didn't end up with {" or }" after the substitution
filecont2 = filecont2.replace('{"','{{')
filecont2 = filecont2.replace('"}','}}')
afterquotevalue_rex = re.compile('"\s*,\s*')
afterbrace_rex = re.compile('"\s*}')
afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
# add new lines to data that changed because of abbreviation substitutions
filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
filecont2 = afterbrace_rex.sub('"\n}', filecont2)
filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
return filecont2
#
# convert @type( ... ) to @type{ ... }
#
def no_outer_parens(filecont):
# do checking for open parens
# will convert to braces
paren_split = re.split('([(){}])',filecont)
open_paren_count = 0
open_type = 0
look_next = 0
# rebuild filecont
filecont = ''
at_rex = re.compile('@\w*')
for phrase in paren_split:
if look_next == 1:
if phrase == '(':
phrase = '{'
open_paren_count = open_paren_count + 1
else:
open_type = 0
look_next = 0
if phrase == '(':
open_paren_count = open_paren_count + 1
elif phrase == ')':
open_paren_count = open_paren_count - 1
if open_type == 1 and open_paren_count == 0:
phrase = '}'
open_type = 0
elif at_rex.search( phrase ):
open_type = 1
look_next = 1
filecont = filecont + phrase
return filecont
#
# make all whitespace into just one space
# format the bibtex file into a usable form.
#
def bibtexwasher(filecont_source):
space_rex = re.compile('\s+')
comment_rex = re.compile('\s*%')
filecont = []
# remove trailing and excessive whitespace
# ignore comments
for line in filecont_source:
line = string.strip(line)
line = space_rex.sub(' ', line)
# ignore comments
if not comment_rex.match(line) and line != '':
filecont.append(' '+ line)
filecont = string.join(filecont, '')
# the file is in one long string
filecont = no_outer_parens(filecont)
#
# split lines according to preferred syntax scheme
#
filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
# add new lines after commas that are after values
filecont = re.sub('"\s*,', '",\n', filecont)
filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
'\n\n\g<1>\g<2>,\n', filecont)
# add new lines after }
filecont = re.sub('"\s*}','"\n}\n', filecont)
filecont = re.sub('}\s*,','},\n', filecont)
filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
# character encoding, reserved latex characters
filecont = re.sub('{\\\&}', '&', filecont)
filecont = re.sub('\\\&', '&', filecont)
# do checking for open braces to get format correct
open_brace_count = 0
brace_split = re.split('([{}])',filecont)
# rebuild filecont
filecont = ''
for phrase in brace_split:
if phrase == '{':
open_brace_count = open_brace_count + 1
elif phrase == '}':
open_brace_count = open_brace_count - 1
if open_brace_count == 0:
filecont = filecont + '\n'
filecont = filecont + phrase
filecont2 = bibtex_replace_abbreviations(filecont)
# gather
filecont = filecont2.splitlines()
i=0
j=0 # count the number of blank lines
for line in filecont:
# ignore blank lines
if line == '' or line == ' ':
j = j+1
continue
filecont[i] = line + '\n'
i = i+1
# get rid of the extra stuff at the end of the array
# (The extra stuff are duplicates that are in the array because
# blank lines were removed.)
length = len( filecont)
filecont[length-j:length] = []
return filecont
def filehandler(filepath):
try:
fd = open(filepath, 'r')
filecont_source = fd.readlines()
fd.close()
except:
print 'Could not open file:', filepath
washeddata = bibtexwasher(filecont_source)
outdata = bibtexdecoder(washeddata)
print '/**'
print '\page references References'
print
for line in outdata:
print line
print '*/'
# main program
def main():
import sys
if sys.argv[1:]:
filepath = sys.argv[1]
else:
print "No input file"
sys.exit()
filehandler(filepath)
if __name__ == "__main__": main()
# end python script