,
- entrycont = {}
- entry = []
- entrytype = pubtype_rex.sub('\g<1>',line)
- entrytype = string.lower(entrytype)
- entryid = pubtype_rex.sub('\g<2>', line)
-
- # end entry if just a }
- elif endtype_rex.match(line):
- # generate doxygen code for the entry
-
- # enty type related formattings
- if entrytype in ('book', 'inbook'):
- entrycont['title'] = '' + entrycont['title'] + ''
- if not entrycont.has_key('author'):
- entrycont['author'] = entrycont['editor']
- entrycont['author']['text'] += ', editors'
- elif entrytype == 'article':
- entrycont['journal'] = '' + entrycont['journal'] + ''
- elif entrytype in ('inproceedings', 'incollection', 'conference'):
- entrycont['booktitle'] = '' + entrycont['booktitle'] + ''
- elif entrytype == 'techreport':
- if not entrycont.has_key('type'):
- entrycont['type'] = 'Technical report'
- elif entrytype == 'mastersthesis':
- entrycont['type'] = 'Master\'s thesis'
- elif entrytype == 'phdthesis':
- entrycont['type'] = 'PhD thesis'
-
- for eline in entrycont:
- if eline != '':
- eline = latexreplacements(eline)
-
- if entrycont.has_key('pages') and (entrycont['pages'] != ''):
- entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
-
- if entrycont.has_key('author') and (entrycont['author'] != ''):
- entry.append(entrycont['author']['text'] + '.')
- if entrycont.has_key('title') and (entrycont['title'] != ''):
- entry.append(entrycont['title'] + '.')
- if entrycont.has_key('journal') and (entrycont['journal'] != ''):
- entry.append(entrycont['journal'] + ',')
- if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
- entry.append('In ' + entrycont['booktitle'] + ',')
- if entrycont.has_key('type') and (entrycont['type'] != ''):
- eline = entrycont['type']
- if entrycont.has_key('number') and (entrycont['number'] != ''):
- eline += ' ' + entrycont['number']
- eline += ','
- entry.append(eline)
- if entrycont.has_key('institution') and (entrycont['institution'] != ''):
- entry.append(entrycont['institution'] + ',')
- if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
- entry.append(entrycont['publisher'] + ',')
- if entrycont.has_key('school') and (entrycont['school'] != ''):
- entry.append(entrycont['school'] + ',')
- if entrycont.has_key('address') and (entrycont['address'] != ''):
- entry.append(entrycont['address'] + ',')
- if entrycont.has_key('edition') and (entrycont['edition'] != ''):
- entry.append(entrycont['edition'] + ' edition,')
- if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
- entry.append(entrycont['howpublished'] + ',')
- if entrycont.has_key('volume') and (entrycont['volume'] != ''):
- eline = entrycont['volume'];
- if entrycont.has_key('number') and (entrycont['number'] != ''):
- eline += '(' + entrycont['number'] + ')'
- if entrycont.has_key('pages') and (entrycont['pages'] != ''):
- eline += ':' + entrycont['pages']
- eline += ','
- entry.append(eline)
- else:
- if entrycont.has_key('pages') and (entrycont['pages'] != ''):
- entry.append('pages ' + entrycont['pages'] + ',')
- if entrycont.has_key('year') and (entrycont['year'] != ''):
- if entrycont.has_key('month') and (entrycont['month'] != ''):
- entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
- else:
- entry.append(entrycont['year'] + '.')
- if entrycont.has_key('note') and (entrycont['note'] != ''):
- entry.append(entrycont['note'] + '.')
- if entrycont.has_key('url') and (entrycont['url'] != ''):
- entry.append(entrycont['url'] + '.')
-
- # generate keys for sorting and for the output
- sortkey = ''
- bibkey = ''
- if entrycont.has_key('author'):
- for author in entrycont['author']['list']:
- sortkey += copychars(author, author.rfind(' ')+1, len(author))
- bibkey = entrycont['author']['abbrev']
- else:
- bibkey = 'x'
- if entrycont.has_key('year'):
- sortkey += entrycont['year']
- bibkey += entrycont['year'][-2:]
- if entrycont.has_key('title'):
- sortkey += entrycont['title']
- if entrycont.has_key('key'):
- sortkey = entrycont['key'] + sortkey
- bibkey = entrycont['key']
- entry.insert(0, sortkey)
- entry.insert(1, bibkey)
- entry.insert(2, entryid)
-
- # add the entry to the file contents
- filecont.append(entry)
-
- else:
- # field, publication info
- field = ''
- data = ''
-
- # field = {data} entries
- if bracedata_rex.match(line):
- field = bracefield_rex.sub('\g<1>', line)
- field = string.lower(field)
- data = bracedata_rex.sub('\g<2>', line)
-
- # field = "data" entries
- elif quotedata_rex.match(line):
- field = quotefield_rex.sub('\g<1>', line)
- field = string.lower(field)
- data = quotedata_rex.sub('\g<2>', line)
-
- # field = data entries
- elif data_rex.match(line):
- field = field_rex.sub('\g<1>', line)
- field = string.lower(field)
- data = data_rex.sub('\g<2>', line)
-
- if field == 'url':
- data = '\\url{' + data.strip() + '}'
-
- if field in ('author', 'editor'):
- entrycont[field] = bibtexauthor(data)
- line = ''
- elif field == 'title':
- line = bibtextitle(data, entrytype)
- elif field != '':
- line = removebraces(transformurls(data.strip()))
-
- if line != '':
- line = latexreplacements(line)
- entrycont[field] = line
-
-
- # sort entries
- filecont.sort(entry_cmp)
-
- # count the bibtex keys
- keytable = {}
- counttable = {}
- for entry in filecont:
- bibkey = entry[1]
- if not keytable.has_key(bibkey):
- keytable[bibkey] = 1
- else:
- keytable[bibkey] += 1
-
- for bibkey in keytable.keys():
- counttable[bibkey] = 0
-
- # generate output
- for entry in filecont:
- # generate output key form the bibtex key
- bibkey = entry[1]
- entryid = entry[2]
- if keytable[bibkey] == 1:
- outkey = bibkey
- else:
- outkey = bibkey + chr(97 + counttable[bibkey])
- counttable[bibkey] += 1
-
- # append the entry code to the output
- file.append('\\section ' + entryid + ' [' + outkey + ']')
- file.append('')
- for line in entry[3:]:
- file.append(line)
- file.append('
')
- file.append('')
-
- return file
-
-
-#
-# return 1 iff abbr is in line but not inside braces or quotes
-# assumes that abbr appears only once on the line (out of braces and quotes)
-#
-def verify_out_of_braces(line, abbr):
-
- phrase_split = delimiter_rex.split(line)
-
- abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
-
- open_brace = 0
- open_quote = 0
-
- for phrase in phrase_split:
- if phrase == "{":
- open_brace = open_brace + 1
- elif phrase == "}":
- open_brace = open_brace - 1
- elif phrase == '"':
- if open_quote == 1:
- open_quote = 0
- else:
- open_quote = 1
- elif abbr_rex.search(phrase):
- if open_brace == 0 and open_quote == 0:
- return 1
-
- return 0
-
-
-#
-# a line in the form phrase1 # phrase2 # ... # phrasen
-# is returned as phrase1 phrase2 ... phrasen
-# with the correct punctuation
-# Bug: Doesn't always work with multiple abbreviations plugged in
-#
-def concat_line(line):
- # only look at part after equals
- field = field_rex.sub('\g<1>',line)
- rest = field_rex.sub('\g<2>',line)
-
- concat_line = field + ' ='
-
- pound_split = concatsplit_rex.split(rest)
-
- phrase_count = 0
- length = len(pound_split)
-
- for phrase in pound_split:
- phrase = phrase.strip()
- if phrase_count != 0:
- if phrase.startswith('"') or phrase.startswith('{'):
- phrase = phrase[1:]
- elif phrase.startswith('"'):
- phrase = phrase.replace('"','{',1)
-
- if phrase_count != length-1:
- if phrase.endswith('"') or phrase.endswith('}'):
- phrase = phrase[:-1]
- else:
- if phrase.endswith('"'):
- phrase = phrase[:-1]
- phrase = phrase + "}"
- elif phrase.endswith('",'):
- phrase = phrase[:-2]
- phrase = phrase + "},"
-
- # if phrase did have \#, add the \# back
- if phrase.endswith('\\'):
- phrase = phrase + "#"
- concat_line = concat_line + ' ' + phrase
-
- phrase_count = phrase_count + 1
-
- return concat_line
-
-
-#
-# substitute abbreviations into filecont
-# @param filecont_source - string of data from file
-#
-def bibtex_replace_abbreviations(filecont_source):
- filecont = filecont_source.splitlines()
-
- # These are defined in bibtex, so we'll define them too
- abbr_list = ['jan','feb','mar','apr','may','jun',
- 'jul','aug','sep','oct','nov','dec']
- value_list = ['January','February','March','April',
- 'May','June','July','August','September',
- 'October','November','December']
-
- abbr_rex = []
- total_abbr_count = 0
-
- front = '\\b'
- back = '(,?)\\b'
-
- for x in abbr_list:
- abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
- total_abbr_count = total_abbr_count + 1
-
-
- abbrdef_rex = re.compile('\s*@string\s*{\s*('+ valid_name_chars +'*)\s*=(.*)',
- re.I)
-
- comment_rex = re.compile('@comment\s*{',re.I)
- preamble_rex = re.compile('@preamble\s*{',re.I)
-
- waiting_for_end_string = 0
- i = 0
- filecont2 = ''
-
- for line in filecont:
- if line == ' ' or line == '':
- continue
-
- if waiting_for_end_string:
- if re.search('}',line):
- waiting_for_end_string = 0
- continue
-
- if abbrdef_rex.search(line):
- abbr = abbrdef_rex.sub('\g<1>', line)
-
- if abbr_list.count(abbr) == 0:
- val = abbrdef_rex.sub('\g<2>', line)
- abbr_list.append(abbr)
- value_list.append(string.strip(val))
- abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
- total_abbr_count = total_abbr_count + 1
- waiting_for_end_string = 1
- continue
-
- if comment_rex.search(line):
- waiting_for_end_string = 1
- continue
-
- if preamble_rex.search(line):
- waiting_for_end_string = 1
- continue
-
-
- # replace subsequent abbreviations with the value
- abbr_count = 0
-
- for x in abbr_list:
-
- if abbr_rex[abbr_count].search(line):
- if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
- line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
- # Check for # concatenations
- if concatsplit_rex.search(line):
- line = concat_line(line)
- abbr_count = abbr_count + 1
-
-
- filecont2 = filecont2 + line + '\n'
- i = i+1
-
-
- # Do one final pass over file
-
- # make sure that didn't end up with {" or }" after the substitution
- filecont2 = filecont2.replace('{"','{{')
- filecont2 = filecont2.replace('"}','}}')
-
- afterquotevalue_rex = re.compile('"\s*,\s*')
- afterbrace_rex = re.compile('"\s*}')
- afterbracevalue_rex = re.compile('(=\s*{[^=]*)},\s*')
-
- # add new lines to data that changed because of abbreviation substitutions
- filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
- filecont2 = afterbrace_rex.sub('"\n}', filecont2)
- filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
-
- return filecont2
-
-#
-# convert @type( ... ) to @type{ ... }
-#
-def no_outer_parens(filecont):
-
- # do checking for open parens
- # will convert to braces
- paren_split = re.split('([(){}])',filecont)
-
- open_paren_count = 0
- open_type = 0
- look_next = 0
-
- # rebuild filecont
- filecont = ''
-
- at_rex = re.compile('@\w*')
-
- for phrase in paren_split:
- if look_next == 1:
- if phrase == '(':
- phrase = '{'
- open_paren_count = open_paren_count + 1
- else:
- open_type = 0
- look_next = 0
-
- if phrase == '(':
- open_paren_count = open_paren_count + 1
-
- elif phrase == ')':
- open_paren_count = open_paren_count - 1
- if open_type == 1 and open_paren_count == 0:
- phrase = '}'
- open_type = 0
-
- elif at_rex.search( phrase ):
- open_type = 1
- look_next = 1
-
- filecont = filecont + phrase
-
- return filecont
-
-
-#
-# make all whitespace into just one space
-# format the bibtex file into a usable form.
-#
-def bibtexwasher(filecont_source):
-
- space_rex = re.compile('\s+')
- comment_rex = re.compile('\s*%')
-
- filecont = []
-
- # remove trailing and excessive whitespace
- # ignore comments
- for line in filecont_source:
- line = string.strip(line)
- line = space_rex.sub(' ', line)
- # ignore comments
- if not comment_rex.match(line) and line != '':
- filecont.append(' '+ line)
-
- filecont = string.join(filecont, '')
-
- # the file is in one long string
-
- filecont = no_outer_parens(filecont)
-
- #
- # split lines according to preferred syntax scheme
- #
- filecont = re.sub('(=\s*{[^=]*)},', '\g<1>},\n', filecont)
-
- # add new lines after commas that are after values
- filecont = re.sub('"\s*,', '",\n', filecont)
- filecont = re.sub('=\s*([\w\d]+)\s*,', '= \g<1>,\n', filecont)
- filecont = re.sub('(@\w*)\s*({(\s*)[^,\s]*)\s*,',
- '\n\n\g<1>\g<2>,\n', filecont)
-
- # add new lines after }
- filecont = re.sub('"\s*}','"\n}\n', filecont)
- filecont = re.sub('}\s*,','},\n', filecont)
-
-
- filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
-
- # character encoding, reserved latex characters
- filecont = re.sub('{\\\&}', '&', filecont)
- filecont = re.sub('\\\&', '&', filecont)
-
- # do checking for open braces to get format correct
- open_brace_count = 0
- brace_split = re.split('([{}])',filecont)
-
- # rebuild filecont
- filecont = ''
-
- for phrase in brace_split:
- if phrase == '{':
- open_brace_count = open_brace_count + 1
- elif phrase == '}':
- open_brace_count = open_brace_count - 1
- if open_brace_count == 0:
- filecont = filecont + '\n'
-
- filecont = filecont + phrase
-
- filecont2 = bibtex_replace_abbreviations(filecont)
-
- # gather
- filecont = filecont2.splitlines()
- i=0
- j=0 # count the number of blank lines
- for line in filecont:
- # ignore blank lines
- if line == '' or line == ' ':
- j = j+1
- continue
- filecont[i] = line + '\n'
- i = i+1
-
- # get rid of the extra stuff at the end of the array
- # (The extra stuff are duplicates that are in the array because
- # blank lines were removed.)
- length = len( filecont)
- filecont[length-j:length] = []
-
- return filecont
-
-
-def filehandler(filepath):
- try:
- fd = open(filepath, 'r')
- filecont_source = fd.readlines()
- fd.close()
- except:
- print 'Could not open file:', filepath
- washeddata = bibtexwasher(filecont_source)
- outdata = bibtexdecoder(washeddata)
- print '/**'
- print '\page references References'
- print
- for line in outdata:
- print line
- print '*/'
-
-
-# main program
-
-def main():
- import sys
- if sys.argv[1:]:
- filepath = sys.argv[1]
- else:
- print "No input file"
- sys.exit()
- filehandler(filepath)
-
-if __name__ == "__main__": main()
-
-
-# end python script