,
kpeter@790: entrycont = {}
kpeter@790: entry = []
kpeter@790: entrytype = pubtype_rex.sub('\g<1>',line)
kpeter@790: entrytype = string.lower(entrytype)
kpeter@792: entryid = pubtype_rex.sub('\g<2>', line)
kpeter@790:
kpeter@790: # end entry if just a }
kpeter@790: elif endtype_rex.match(line):
kpeter@790: # generate doxygen code for the entry
kpeter@790:
kpeter@790: # enty type related formattings
kpeter@790: if entrytype in ('book', 'inbook'):
kpeter@790: entrycont['title'] = '' + entrycont['title'] + ''
kpeter@790: if not entrycont.has_key('author'):
kpeter@790: entrycont['author'] = entrycont['editor']
kpeter@790: entrycont['author']['text'] += ', editors'
kpeter@790: elif entrytype == 'article':
kpeter@790: entrycont['journal'] = '' + entrycont['journal'] + ''
kpeter@790: elif entrytype in ('inproceedings', 'incollection', 'conference'):
kpeter@790: entrycont['booktitle'] = '' + entrycont['booktitle'] + ''
kpeter@790: elif entrytype == 'techreport':
kpeter@790: if not entrycont.has_key('type'):
kpeter@790: entrycont['type'] = 'Technical report'
kpeter@790: elif entrytype == 'mastersthesis':
kpeter@790: entrycont['type'] = 'Master\'s thesis'
kpeter@790: elif entrytype == 'phdthesis':
kpeter@790: entrycont['type'] = 'PhD thesis'
kpeter@790:
kpeter@790: for eline in entrycont:
kpeter@790: if eline != '':
kpeter@790: eline = latexreplacements(eline)
kpeter@790:
kpeter@790: if entrycont.has_key('pages') and (entrycont['pages'] != ''):
kpeter@790: entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
kpeter@790:
kpeter@790: if entrycont.has_key('author') and (entrycont['author'] != ''):
kpeter@790: entry.append(entrycont['author']['text'] + '.')
kpeter@790: if entrycont.has_key('title') and (entrycont['title'] != ''):
kpeter@790: entry.append(entrycont['title'] + '.')
kpeter@790: if entrycont.has_key('journal') and (entrycont['journal'] != ''):
kpeter@790: entry.append(entrycont['journal'] + ',')
kpeter@790: if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
kpeter@790: entry.append('In ' + entrycont['booktitle'] + ',')
kpeter@790: if entrycont.has_key('type') and (entrycont['type'] != ''):
kpeter@790: eline = entrycont['type']
kpeter@790: if entrycont.has_key('number') and (entrycont['number'] != ''):
kpeter@790: eline += ' ' + entrycont['number']
kpeter@790: eline += ','
kpeter@790: entry.append(eline)
kpeter@790: if entrycont.has_key('institution') and (entrycont['institution'] != ''):
kpeter@790: entry.append(entrycont['institution'] + ',')
kpeter@790: if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
kpeter@790: entry.append(entrycont['publisher'] + ',')
kpeter@790: if entrycont.has_key('school') and (entrycont['school'] != ''):
kpeter@790: entry.append(entrycont['school'] + ',')
kpeter@790: if entrycont.has_key('address') and (entrycont['address'] != ''):
kpeter@790: entry.append(entrycont['address'] + ',')
kpeter@790: if entrycont.has_key('edition') and (entrycont['edition'] != ''):
kpeter@790: entry.append(entrycont['edition'] + ' edition,')
kpeter@790: if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
kpeter@790: entry.append(entrycont['howpublished'] + ',')
kpeter@790: if entrycont.has_key('volume') and (entrycont['volume'] != ''):
kpeter@790: eline = entrycont['volume'];
kpeter@790: if entrycont.has_key('number') and (entrycont['number'] != ''):
kpeter@790: eline += '(' + entrycont['number'] + ')'
kpeter@790: if entrycont.has_key('pages') and (entrycont['pages'] != ''):
kpeter@790: eline += ':' + entrycont['pages']
kpeter@790: eline += ','
kpeter@790: entry.append(eline)
kpeter@790: else:
kpeter@790: if entrycont.has_key('pages') and (entrycont['pages'] != ''):
kpeter@790: entry.append('pages ' + entrycont['pages'] + ',')
kpeter@790: if entrycont.has_key('year') and (entrycont['year'] != ''):
kpeter@790: if entrycont.has_key('month') and (entrycont['month'] != ''):
kpeter@790: entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
kpeter@790: else:
kpeter@790: entry.append(entrycont['year'] + '.')
kpeter@790: if entrycont.has_key('note') and (entrycont['note'] != ''):
kpeter@790: entry.append(entrycont['note'] + '.')
kpeter@801: if entrycont.has_key('url') and (entrycont['url'] != ''):
kpeter@801: entry.append(entrycont['url'] + '.')
kpeter@790:
kpeter@790: # generate keys for sorting and for the output
kpeter@790: sortkey = ''
kpeter@790: bibkey = ''
kpeter@790: if entrycont.has_key('author'):
kpeter@790: for author in entrycont['author']['list']:
kpeter@790: sortkey += copychars(author, author.rfind(' ')+1, len(author))
kpeter@790: bibkey = entrycont['author']['abbrev']
kpeter@790: else:
kpeter@790: bibkey = 'x'
kpeter@790: if entrycont.has_key('year'):
kpeter@790: sortkey += entrycont['year']
kpeter@790: bibkey += entrycont['year'][-2:]
kpeter@790: if entrycont.has_key('title'):
kpeter@790: sortkey += entrycont['title']
kpeter@790: if entrycont.has_key('key'):
kpeter@790: sortkey = entrycont['key'] + sortkey
kpeter@790: bibkey = entrycont['key']
kpeter@790: entry.insert(0, sortkey)
kpeter@790: entry.insert(1, bibkey)
kpeter@792: entry.insert(2, entryid)
kpeter@790:
kpeter@790: # add the entry to the file contents
kpeter@790: filecont.append(entry)
kpeter@790:
kpeter@790: else:
kpeter@790: # field, publication info
kpeter@790: field = ''
kpeter@790: data = ''
kpeter@790:
kpeter@790: # field = {data} entries
kpeter@790: if bracedata_rex.match(line):
kpeter@790: field = bracefield_rex.sub('\g<1>', line)
kpeter@790: field = string.lower(field)
kpeter@790: data = bracedata_rex.sub('\g<2>', line)
kpeter@790:
kpeter@790: # field = "data" entries
kpeter@790: elif quotedata_rex.match(line):
kpeter@790: field = quotefield_rex.sub('\g<1>', line)
kpeter@790: field = string.lower(field)
kpeter@790: data = quotedata_rex.sub('\g<2>', line)
kpeter@790:
kpeter@790: # field = data entries
kpeter@790: elif data_rex.match(line):
kpeter@790: field = field_rex.sub('\g<1>', line)
kpeter@790: field = string.lower(field)
kpeter@790: data = data_rex.sub('\g<2>', line)
kpeter@801:
kpeter@801: if field == 'url':
kpeter@801: data = '\\url{' + data.strip() + '}'
kpeter@790:
kpeter@790: if field in ('author', 'editor'):
kpeter@790: entrycont[field] = bibtexauthor(data)
kpeter@790: line = ''
kpeter@790: elif field == 'title':
kpeter@790: line = bibtextitle(data, entrytype)
kpeter@790: elif field != '':
kpeter@790: line = removebraces(transformurls(data.strip()))
kpeter@790:
kpeter@790: if line != '':
kpeter@790: line = latexreplacements(line)
kpeter@790: entrycont[field] = line
kpeter@790:
kpeter@790:
kpeter@790: # sort entries
kpeter@790: filecont.sort(entry_cmp)
kpeter@790:
kpeter@790: # count the bibtex keys
kpeter@790: keytable = {}
kpeter@790: counttable = {}
kpeter@790: for entry in filecont:
kpeter@790: bibkey = entry[1]
kpeter@790: if not keytable.has_key(bibkey):
kpeter@790: keytable[bibkey] = 1
kpeter@790: else:
kpeter@790: keytable[bibkey] += 1
kpeter@790:
kpeter@790: for bibkey in keytable.keys():
kpeter@790: counttable[bibkey] = 0
kpeter@790:
kpeter@790: # generate output
kpeter@790: for entry in filecont:
kpeter@790: # generate output key form the bibtex key
kpeter@790: bibkey = entry[1]
kpeter@792: entryid = entry[2]
kpeter@790: if keytable[bibkey] == 1:
kpeter@790: outkey = bibkey
kpeter@790: else:
kpeter@790: outkey = bibkey + chr(97 + counttable[bibkey])
kpeter@790: counttable[bibkey] += 1
kpeter@790:
kpeter@790: # append the entry code to the output
kpeter@792: file.append('\\section ' + entryid + ' [' + outkey + ']')
kpeter@792: file.append('')
kpeter@792: for line in entry[3:]:
kpeter@790: file.append(line)
kpeter@792: file.append('
')
kpeter@790: file.append('')
kpeter@790:
kpeter@790: return file
kpeter@790:
kpeter@790:
kpeter@790: #
kpeter@790: # return 1 iff abbr is in line but not inside braces or quotes
kpeter@790: # assumes that abbr appears only once on the line (out of braces and quotes)
kpeter@790: #
kpeter@790: def verify_out_of_braces(line, abbr):
kpeter@790:
kpeter@790: phrase_split = delimiter_rex.split(line)
kpeter@790:
kpeter@790: abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
kpeter@790:
kpeter@790: open_brace = 0
kpeter@790: open_quote = 0
kpeter@790:
kpeter@790: for phrase in phrase_split:
kpeter@790: if phrase == "{":
kpeter@790: open_brace = open_brace + 1
kpeter@790: elif phrase == "}":
kpeter@790: open_brace = open_brace - 1
kpeter@790: elif phrase == '"':
kpeter@790: if open_quote == 1:
kpeter@790: open_quote = 0
kpeter@790: else:
kpeter@790: open_quote = 1
kpeter@790: elif abbr_rex.search(phrase):
kpeter@790: if open_brace == 0 and open_quote == 0:
kpeter@790: return 1
kpeter@790:
kpeter@790: return 0
kpeter@790:
kpeter@790:
kpeter@790: #
kpeter@790: # a line in the form phrase1 # phrase2 # ... # phrasen
kpeter@790: # is returned as phrase1 phrase2 ... phrasen
kpeter@790: # with the correct punctuation
kpeter@790: # Bug: Doesn't always work with multiple abbreviations plugged in
kpeter@790: #
kpeter@790: def concat_line(line):
kpeter@790: # only look at part after equals
kpeter@790: field = field_rex.sub('\g<1>',line)
kpeter@790: rest = field_rex.sub('\g<2>',line)
kpeter@790:
kpeter@790: concat_line = field + ' ='
kpeter@790:
kpeter@790: pound_split = concatsplit_rex.split(rest)
kpeter@790:
kpeter@790: phrase_count = 0
kpeter@790: length = len(pound_split)
kpeter@790:
kpeter@790: for phrase in pound_split:
kpeter@790: phrase = phrase.strip()
kpeter@790: if phrase_count != 0:
kpeter@790: if phrase.startswith('"') or phrase.startswith('{'):
kpeter@790: phrase = phrase[1:]
kpeter@790: elif phrase.startswith('"'):
kpeter@790: phrase = phrase.replace('"','{',1)
kpeter@790:
kpeter@790: if phrase_count != length-1:
kpeter@790: if phrase.endswith('"') or phrase.endswith('}'):
kpeter@790: phrase = phrase[:-1]
kpeter@790: else:
kpeter@790: if phrase.endswith('"'):
kpeter@790: phrase = phrase[:-1]
kpeter@790: phrase = phrase + "}"
kpeter@790: elif phrase.endswith('",'):
kpeter@790: phrase = phrase[:-2]
kpeter@790: phrase = phrase + "},"
kpeter@790:
kpeter@790: # if phrase did have \#, add the \# back
kpeter@790: if phrase.endswith('\\'):
kpeter@790: phrase = phrase + "#"
kpeter@790: concat_line = concat_line + ' ' + phrase
kpeter@790:
kpeter@790: phrase_count = phrase_count + 1
kpeter@790:
kpeter@790: return concat_line
kpeter@790:
kpeter@790:
kpeter@790: #
kpeter@790: # substitute abbreviations into filecont
kpeter@790: # @param filecont_source - string of data from file
kpeter@790: #
kpeter@790: def bibtex_replace_abbreviations(filecont_source):
kpeter@790: filecont = filecont_source.splitlines()
kpeter@790:
kpeter@790: # These are defined in bibtex, so we'll define them too
kpeter@790: abbr_list = ['jan','feb','mar','apr','may','jun',
kpeter@790: 'jul','aug','sep','oct','nov','dec']
kpeter@790: value_list = ['January','February','March','April',
kpeter@790: 'May','June','July','August','September',
kpeter@790: 'October','November','December']
kpeter@790:
kpeter@790: abbr_rex = []
kpeter@790: total_abbr_count = 0
kpeter@790:
kpeter@790: front = '\\b'
kpeter@790: back = '(,?)\\b'
kpeter@790:
kpeter@790: for x in abbr_list:
kpeter@790: abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
kpeter@790: total_abbr_count = total_abbr_count + 1
kpeter@790:
kpeter@790:
kpeter@790: abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
kpeter@790: re.I)
kpeter@790:
kpeter@790: comment_rex = re.compile('@comment\s*{',re.I)
kpeter@790: preamble_rex = re.compile('@preamble\s*{',re.I)
kpeter@790:
kpeter@790: waiting_for_end_string = 0
kpeter@790: i = 0
kpeter@790: filecont2 = ''
kpeter@790:
kpeter@790: for line in filecont:
kpeter@790: if line == ' ' or line == '':
kpeter@790: continue
kpeter@790:
kpeter@790: if waiting_for_end_string:
kpeter@790: if re.search('}',line):
kpeter@790: waiting_for_end_string = 0
kpeter@790: continue
kpeter@790:
kpeter@790: if abbrdef_rex.search(line):
kpeter@790: abbr = abbrdef_rex.sub('\g<1>', line)
kpeter@790:
kpeter@790: if abbr_list.count(abbr) == 0:
kpeter@790: val = abbrdef_rex.sub('\g<2>', line)
kpeter@790: abbr_list.append(abbr)
kpeter@790: value_list.append(string.strip(val))
kpeter@790: abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
kpeter@790: total_abbr_count = total_abbr_count + 1
kpeter@790: waiting_for_end_string = 1
kpeter@790: continue
kpeter@790:
kpeter@790: if comment_rex.search(line):
kpeter@790: waiting_for_end_string = 1
kpeter@790: continue
kpeter@790:
kpeter@790: if preamble_rex.search(line):
kpeter@790: waiting_for_end_string = 1
kpeter@790: continue
kpeter@790:
kpeter@790:
kpeter@790: # replace subsequent abbreviations with the value
kpeter@790: abbr_count = 0
kpeter@790:
kpeter@790: for x in abbr_list:
kpeter@790:
kpeter@790: if abbr_rex[abbr_count].search(line):
kpeter@790: if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
kpeter@790: line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
kpeter@790: # Check for # concatenations
kpeter@790: if concatsplit_rex.search(line):
kpeter@790: line = concat_line(line)
kpeter@790: abbr_count = abbr_count + 1
kpeter@790:
kpeter@790:
kpeter@790: filecont2 = filecont2 + line + '\n'
kpeter@790: i = i+1
kpeter@790:
kpeter@790:
kpeter@790: # Do one final pass over file
kpeter@790:
kpeter@790: # make sure that didn't end up with {" or }" after the substitution
kpeter@790: filecont2 = filecont2.replace('{"','{{')
kpeter@790: filecont2 = filecont2.replace('"}','}}')
kpeter@790:
kpeter@790: afterquotevalue_rex = re.compile('"\s*,\s*')
kpeter@790: afterbrace_rex = re.compile('"\s*}')
kpeter@790: afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
kpeter@790:
kpeter@790: # add new lines to data that changed because of abbreviation substitutions
kpeter@790: filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
kpeter@790: filecont2 = afterbrace_rex.sub('"\n}', filecont2)
kpeter@790: filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
kpeter@790:
kpeter@790: return filecont2
kpeter@790:
kpeter@790: #
kpeter@790: # convert @type( ... ) to @type{ ... }
kpeter@790: #
kpeter@790: def no_outer_parens(filecont):
kpeter@790:
kpeter@790: # do checking for open parens
kpeter@790: # will convert to braces
kpeter@790: paren_split = re.split('([(){}])',filecont)
kpeter@790:
kpeter@790: open_paren_count = 0
kpeter@790: open_type = 0
kpeter@790: look_next = 0
kpeter@790:
kpeter@790: # rebuild filecont
kpeter@790: filecont = ''
kpeter@790:
kpeter@790: at_rex = re.compile('@\w*')
kpeter@790:
kpeter@790: for phrase in paren_split:
kpeter@790: if look_next == 1:
kpeter@790: if phrase == '(':
kpeter@790: phrase = '{'
kpeter@790: open_paren_count = open_paren_count + 1
kpeter@790: else:
kpeter@790: open_type = 0
kpeter@790: look_next = 0
kpeter@790:
kpeter@790: if phrase == '(':
kpeter@790: open_paren_count = open_paren_count + 1
kpeter@790:
kpeter@790: elif phrase == ')':
kpeter@790: open_paren_count = open_paren_count - 1
kpeter@790: if open_type == 1 and open_paren_count == 0:
kpeter@790: phrase = '}'
kpeter@790: open_type = 0
kpeter@790:
kpeter@790: elif at_rex.search( phrase ):
kpeter@790: open_type = 1
kpeter@790: look_next = 1
kpeter@790:
kpeter@790: filecont = filecont + phrase
kpeter@790:
kpeter@790: return filecont
kpeter@790:
kpeter@790:
kpeter@790: #
kpeter@790: # make all whitespace into just one space
kpeter@790: # format the bibtex file into a usable form.
kpeter@790: #
kpeter@790: def bibtexwasher(filecont_source):
kpeter@790:
kpeter@790: space_rex = re.compile('\s+')
kpeter@790: comment_rex = re.compile('\s*%')
kpeter@790:
kpeter@790: filecont = []
kpeter@790:
kpeter@790: # remove trailing and excessive whitespace
kpeter@790: # ignore comments
kpeter@790: for line in filecont_source:
kpeter@790: line = string.strip(line)
kpeter@790: line = space_rex.sub(' ', line)
kpeter@790: # ignore comments
kpeter@790: if not comment_rex.match(line) and line != '':
kpeter@790: filecont.append(' '+ line)
kpeter@790:
kpeter@790: filecont = string.join(filecont, '')
kpeter@790:
kpeter@790: # the file is in one long string
kpeter@790:
kpeter@790: filecont = no_outer_parens(filecont)
kpeter@790:
kpeter@790: #
kpeter@790: # split lines according to preferred syntax scheme
kpeter@790: #
kpeter@790: filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
kpeter@790:
kpeter@790: # add new lines after commas that are after values
kpeter@790: filecont = re.sub('"\s*,', '",\n', filecont)
kpeter@790: filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
kpeter@790: filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
kpeter@790: '\n\n\g<1>\g<2>,\n', filecont)
kpeter@790:
kpeter@790: # add new lines after }
kpeter@790: filecont = re.sub('"\s*}','"\n}\n', filecont)
kpeter@790: filecont = re.sub('}\s*,','},\n', filecont)
kpeter@790:
kpeter@790:
kpeter@790: filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
kpeter@790:
kpeter@790: # character encoding, reserved latex characters
kpeter@790: filecont = re.sub('{\\\&}', '&', filecont)
kpeter@790: filecont = re.sub('\\\&', '&', filecont)
kpeter@790:
kpeter@790: # do checking for open braces to get format correct
kpeter@790: open_brace_count = 0
kpeter@790: brace_split = re.split('([{}])',filecont)
kpeter@790:
kpeter@790: # rebuild filecont
kpeter@790: filecont = ''
kpeter@790:
kpeter@790: for phrase in brace_split:
kpeter@790: if phrase == '{':
kpeter@790: open_brace_count = open_brace_count + 1
kpeter@790: elif phrase == '}':
kpeter@790: open_brace_count = open_brace_count - 1
kpeter@790: if open_brace_count == 0:
kpeter@790: filecont = filecont + '\n'
kpeter@790:
kpeter@790: filecont = filecont + phrase
kpeter@790:
kpeter@790: filecont2 = bibtex_replace_abbreviations(filecont)
kpeter@790:
kpeter@790: # gather
kpeter@790: filecont = filecont2.splitlines()
kpeter@790: i=0
kpeter@790: j=0 # count the number of blank lines
kpeter@790: for line in filecont:
kpeter@790: # ignore blank lines
kpeter@790: if line == '' or line == ' ':
kpeter@790: j = j+1
kpeter@790: continue
kpeter@790: filecont[i] = line + '\n'
kpeter@790: i = i+1
kpeter@790:
kpeter@790: # get rid of the extra stuff at the end of the array
kpeter@790: # (The extra stuff are duplicates that are in the array because
kpeter@790: # blank lines were removed.)
kpeter@790: length = len( filecont)
kpeter@790: filecont[length-j:length] = []
kpeter@790:
kpeter@790: return filecont
kpeter@790:
kpeter@790:
kpeter@790: def filehandler(filepath):
kpeter@790: try:
kpeter@790: fd = open(filepath, 'r')
kpeter@790: filecont_source = fd.readlines()
kpeter@790: fd.close()
kpeter@790: except:
kpeter@790: print 'Could not open file:', filepath
kpeter@790: washeddata = bibtexwasher(filecont_source)
kpeter@790: outdata = bibtexdecoder(washeddata)
kpeter@790: print '/**'
kpeter@790: print '\page references References'
kpeter@790: print
kpeter@790: for line in outdata:
kpeter@790: print line
kpeter@790: print '*/'
kpeter@790:
kpeter@790:
kpeter@790: # main program
kpeter@790:
kpeter@790: def main():
kpeter@790: import sys
kpeter@790: if sys.argv[1:]:
kpeter@790: filepath = sys.argv[1]
kpeter@790: else:
kpeter@790: print "No input file"
kpeter@790: sys.exit()
kpeter@790: filehandler(filepath)
kpeter@790:
kpeter@790: if __name__ == "__main__": main()
kpeter@790:
kpeter@790:
kpeter@790: # end python script