COIN-OR::LEMON - Graph Library

Context Navigation

source: lemon-main/scripts/bib2dox.py @ 791:4e3484a2e90c

Last change on this file since 791:4e3484a2e90c was 754:2de0fc630899, checked in by Peter Kovacs <kpeter@…>, 15 years ago
Handle url fields in bib2dox.py (#184) and modify the bibtex file using url fields.
File size: 25.6 KB

Rev	Line
[743]	1	#!/usr/bin/env /usr/local/Python/bin/python2.1
	2	"""
	3	BibTeX to Doxygen converter
	4	Usage: python bib2dox.py bibfile.bib > bibfile.dox
	5
	6	This code is the modification of the BibTeX to XML converter
	7	by Vidar Bronken Gundersen et al. See the original copyright notices below.
	8
	9	**********************************************************************
	10
	11	Decoder for bibliographic data, BibTeX
	12	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
	13
	14	v.8
	15	(c)2002-06-23 Vidar Bronken Gundersen
	16	http://bibtexml.sf.net/
	17	Reuse approved as long as this notification is kept.
	18	Licence: GPL.
	19
	20	Contributions/thanks to:
	21	Egon Willighagen, http://sf.net/projects/jreferences/
	22	Richard Mahoney (for providing a test case)
	23
	24	Editted by Sara Sprenkle to be more robust and handle more bibtex features.
	25	(c) 2003-01-15
	26
	27	1. Changed bibtex: tags to bibxml: tags.
	28	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
	29	3. Allow spaces between @type and first {
	30	4. "author" fields with multiple authors split by " and "
	31	are put in separate xml "bibxml:author" tags.
	32	5. Option for Titles: words are capitalized
	33	only if first letter in title or capitalized inside braces
	34	6. Removes braces from within field values
	35	7. Ignores comments in bibtex file (including @comment{ or % )
	36	8. Replaces some special latex tags, e.g., replaces ~ with ' '
	37	9. Handles bibtex @string abbreviations
	38	--> includes bibtex's default abbreviations for months
	39	--> does concatenation of abbr # " more " and " more " # abbr
	40	10. Handles @type( ... ) or @type{ ... }
	41	11. The keywords field is split on , or ; and put into separate xml
	42	"bibxml:keywords" tags
	43	12. Ignores @preamble
	44
	45	Known Limitations
	46	1. Does not transform Latex encoding like math mode and special
	47	latex symbols.
	48	2. Does not parse author fields into first and last names.
	49	E.g., It does not do anything special to an author whose name is
	50	in the form LAST_NAME, FIRST_NAME
	51	In "author" tag, will show up as
	52	<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
	53	3. Does not handle "crossref" fields other than to print
	54	<bibxml:crossref>...</bibxml:crossref>
	55	4. Does not inform user of the input's format errors. You just won't
	56	be able to transform the file later with XSL
	57
	58	You will have to manually edit the XML output if you need to handle
	59	these (and unknown) limitations.
	60
	61	"""
	62
	63	import string, re
	64
	65	# set of valid name characters
	66	valid_name_chars = '[\w\-:]'
	67
	68	#
	69	# define global regular expression variables
	70	#
	71	author_rex = re.compile('\s+and\s+')
	72	rembraces_rex = re.compile('[{}]')
[754]	73	capitalize_rex = re.compile('({[^}]*})')
[743]	74
	75	# used by bibtexkeywords(data)
	76	keywords_rex = re.compile('[,;]')
	77
	78	# used by concat_line(line)
	79	concatsplit_rex = re.compile('\s#\s')
	80
	81	# split on {, }, or " in verify_out_of_braces
	82	delimiter_rex = re.compile('([{}"])',re.I)
	83
	84	field_rex = re.compile('\s(\w)\s=\s(.*)')
	85	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
	86
	87	url_rex = re.compile('\\\url\{([^}]*)\}')
	88
[745]	89	#
	90	# styles for html formatting
	91	#
	92	divstyle = 'margin-top: -4ex; margin-left: 8em;'
[743]	93
	94	#
	95	# return the string parameter without braces
	96	#
	97	def transformurls(str):
	98	return url_rex.sub(r'<a href="\1">\1</a>', str)
	99
	100	#
	101	# return the string parameter without braces
	102	#
	103	def removebraces(str):
	104	return rembraces_rex.sub('', str)
	105
	106	#
	107	# latex-specific replacements
	108	# (do this after braces were removed)
	109	#
	110	def latexreplacements(line):
	111	line = string.replace(line, '~', ' ')
	112	line = string.replace(line, '\\\'a', 'á')
	113	line = string.replace(line, '\\"a', 'ä')
	114	line = string.replace(line, '\\\'e', 'é')
	115	line = string.replace(line, '\\"e', 'ë')
	116	line = string.replace(line, '\\\'i', 'í')
	117	line = string.replace(line, '\\"i', 'ï')
	118	line = string.replace(line, '\\\'o', 'ó')
	119	line = string.replace(line, '\\"o', 'ö')
	120	line = string.replace(line, '\\\'u', 'ú')
	121	line = string.replace(line, '\\"u', 'ü')
	122	line = string.replace(line, '\\H o', 'õ')
	123	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
	124	line = string.replace(line, '\\\'A', 'Á')
	125	line = string.replace(line, '\\"A', 'Ä')
	126	line = string.replace(line, '\\\'E', 'É')
	127	line = string.replace(line, '\\"E', 'Ë')
	128	line = string.replace(line, '\\\'I', 'Í')
	129	line = string.replace(line, '\\"I', 'Ï')
	130	line = string.replace(line, '\\\'O', 'Ó')
	131	line = string.replace(line, '\\"O', 'Ö')
	132	line = string.replace(line, '\\\'U', 'Ú')
	133	line = string.replace(line, '\\"U', 'Ü')
	134	line = string.replace(line, '\\H O', 'Õ')
	135	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
	136
	137	return line
	138
	139	#
	140	# copy characters form a string decoding html expressions (&xyz;)
	141	#
	142	def copychars(str, ifrom, count):
	143	result = ''
	144	i = ifrom
	145	c = 0
	146	html_spec = False
	147	while (i < len(str)) and (c < count):
	148	if str[i] == '&':
	149	html_spec = True;
	150	if i+1 < len(str):
	151	result += str[i+1]
	152	c += 1
	153	i += 2
	154	else:
	155	if not html_spec:
	156	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
	157	((str[i] >= 'a') and (str[i] <= 'z')):
	158	result += str[i]
	159	c += 1
	160	elif str[i] == ';':
	161	html_spec = False;
	162	i += 1
	163
	164	return result
	165
	166
	167	#
	168	# Handle a list of authors (separated by 'and').
	169	# It gives back an array of the follwing values:
	170	# - num: the number of authors,
	171	# - list: the list of the author names,
	172	# - text: the bibtex text (separated by commas and/or 'and')
	173	# - abbrev: abbreviation that can be used for indicate the
	174	# bibliography entries
	175	#
	176	def bibtexauthor(data):
	177	result = {}
	178	bibtex = ''
	179	result['list'] = author_rex.split(data)
	180	result['num'] = len(result['list'])
	181	for i, author in enumerate(result['list']):
	182	# general transformations
	183	author = latexreplacements(removebraces(author.strip()))
	184	# transform "Xyz, A. B." to "A. B. Xyz"
	185	pos = author.find(',')
	186	if pos != -1:
	187	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
	188	result['list'][i] = author
	189	bibtex += author + '#'
	190	bibtex = bibtex[:-1]
	191	if result['num'] > 1:
	192	ix = bibtex.rfind('#')
	193	if result['num'] == 2:
	194	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
	195	else:
	196	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
	197	bibtex = bibtex.replace('#', ', ')
	198	result['text'] = bibtex
	199
	200	result['abbrev'] = ''
	201	for author in result['list']:
	202	pos = author.rfind(' ') + 1
	203	count = 1
	204	if result['num'] == 1:
	205	count = 3
	206	result['abbrev'] += copychars(author, pos, count)
	207
	208	return result
	209
	210
	211	#
	212	# data = title string
	213	# @return the capitalized title (first letter is capitalized), rest are capitalized
	214	# only if capitalized inside braces
	215	#
	216	def capitalizetitle(data):
	217	title_list = capitalize_rex.split(data)
	218	title = ''
	219	count = 0
	220	for phrase in title_list:
	221	check = string.lstrip(phrase)
	222
	223	# keep phrase's capitalization the same
	224	if check.find('{') == 0:
	225	title += removebraces(phrase)
	226	else:
	227	# first word --> capitalize first letter (after spaces)
	228	if count == 0:
	229	title += check.capitalize()
	230	else:
	231	title += phrase.lower()
	232	count = count + 1
	233
	234	return title
	235
	236
	237	#
	238	# @return the bibtex for the title
	239	# @param data --> title string
	240	# braces are removed from title
	241	#
	242	def bibtextitle(data, entrytype):
	243	if entrytype in ('book', 'inbook'):
	244	title = removebraces(data.strip())
	245	else:
	246	title = removebraces(capitalizetitle(data.strip()))
	247	bibtex = title
	248	return bibtex
	249
	250
	251	#
	252	# function to compare entry lists
	253	#
	254	def entry_cmp(x, y):
	255	return cmp(x[0], y[0])
	256
	257
	258	#
	259	# print the XML for the transformed "filecont_source"
	260	#
	261	def bibtexdecoder(filecont_source):
	262	filecont = []
	263	file = []
	264
	265	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
	266	pubtype_rex = re.compile('@(\w)\s{\s(.),')
	267	endtype_rex = re.compile('}\s*$')
	268	endtag_rex = re.compile('^\s}\s$')
	269
	270	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
	271	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
	272
	273	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
	274	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
	275
	276	for line in filecont_source:
	277	line = line[:-1]
	278
	279	# encode character entities
	280	line = string.replace(line, '&', '&')
	281	line = string.replace(line, '<', '<')
	282	line = string.replace(line, '>', '>')
	283
	284	# start entry: publication type (store for later use)
	285	if pubtype_rex.match(line):
	286	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
	287	entrycont = {}
	288	entry = []
	289	entrytype = pubtype_rex.sub('\g<1>',line)
	290	entrytype = string.lower(entrytype)
[745]	291	entryid = pubtype_rex.sub('\g<2>', line)
[743]	292
	293	# end entry if just a }
	294	elif endtype_rex.match(line):
	295	# generate doxygen code for the entry
	296
	297	# enty type related formattings
	298	if entrytype in ('book', 'inbook'):
	299	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
	300	if not entrycont.has_key('author'):
	301	entrycont['author'] = entrycont['editor']
	302	entrycont['author']['text'] += ', editors'
	303	elif entrytype == 'article':
	304	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
	305	elif entrytype in ('inproceedings', 'incollection', 'conference'):
	306	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
	307	elif entrytype == 'techreport':
	308	if not entrycont.has_key('type'):
	309	entrycont['type'] = 'Technical report'
	310	elif entrytype == 'mastersthesis':
	311	entrycont['type'] = 'Master\'s thesis'
	312	elif entrytype == 'phdthesis':
	313	entrycont['type'] = 'PhD thesis'
	314
	315	for eline in entrycont:
	316	if eline != '':
	317	eline = latexreplacements(eline)
	318
	319	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
	320	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
	321
	322	if entrycont.has_key('author') and (entrycont['author'] != ''):
	323	entry.append(entrycont['author']['text'] + '.')
	324	if entrycont.has_key('title') and (entrycont['title'] != ''):
	325	entry.append(entrycont['title'] + '.')
	326	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
	327	entry.append(entrycont['journal'] + ',')
	328	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
	329	entry.append('In ' + entrycont['booktitle'] + ',')
	330	if entrycont.has_key('type') and (entrycont['type'] != ''):
	331	eline = entrycont['type']
	332	if entrycont.has_key('number') and (entrycont['number'] != ''):
	333	eline += ' ' + entrycont['number']
	334	eline += ','
	335	entry.append(eline)
	336	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
	337	entry.append(entrycont['institution'] + ',')
	338	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
	339	entry.append(entrycont['publisher'] + ',')
	340	if entrycont.has_key('school') and (entrycont['school'] != ''):
	341	entry.append(entrycont['school'] + ',')
	342	if entrycont.has_key('address') and (entrycont['address'] != ''):
	343	entry.append(entrycont['address'] + ',')
	344	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
	345	entry.append(entrycont['edition'] + ' edition,')
	346	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
	347	entry.append(entrycont['howpublished'] + ',')
	348	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
	349	eline = entrycont['volume'];
	350	if entrycont.has_key('number') and (entrycont['number'] != ''):
	351	eline += '(' + entrycont['number'] + ')'
	352	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
	353	eline += ':' + entrycont['pages']
	354	eline += ','
	355	entry.append(eline)
	356	else:
	357	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
	358	entry.append('pages ' + entrycont['pages'] + ',')
	359	if entrycont.has_key('year') and (entrycont['year'] != ''):
	360	if entrycont.has_key('month') and (entrycont['month'] != ''):
	361	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
	362	else:
	363	entry.append(entrycont['year'] + '.')
	364	if entrycont.has_key('note') and (entrycont['note'] != ''):
	365	entry.append(entrycont['note'] + '.')
[754]	366	if entrycont.has_key('url') and (entrycont['url'] != ''):
	367	entry.append(entrycont['url'] + '.')
[743]	368
	369	# generate keys for sorting and for the output
	370	sortkey = ''
	371	bibkey = ''
	372	if entrycont.has_key('author'):
	373	for author in entrycont['author']['list']:
	374	sortkey += copychars(author, author.rfind(' ')+1, len(author))
	375	bibkey = entrycont['author']['abbrev']
	376	else:
	377	bibkey = 'x'
	378	if entrycont.has_key('year'):
	379	sortkey += entrycont['year']
	380	bibkey += entrycont['year'][-2:]
	381	if entrycont.has_key('title'):
	382	sortkey += entrycont['title']
	383	if entrycont.has_key('key'):
	384	sortkey = entrycont['key'] + sortkey
	385	bibkey = entrycont['key']
	386	entry.insert(0, sortkey)
	387	entry.insert(1, bibkey)
[745]	388	entry.insert(2, entryid)
[743]	389
	390	# add the entry to the file contents
	391	filecont.append(entry)
	392
	393	else:
	394	# field, publication info
	395	field = ''
	396	data = ''
	397
	398	# field = {data} entries
	399	if bracedata_rex.match(line):
	400	field = bracefield_rex.sub('\g<1>', line)
	401	field = string.lower(field)
	402	data = bracedata_rex.sub('\g<2>', line)
	403
	404	# field = "data" entries
	405	elif quotedata_rex.match(line):
	406	field = quotefield_rex.sub('\g<1>', line)
	407	field = string.lower(field)
	408	data = quotedata_rex.sub('\g<2>', line)
	409
	410	# field = data entries
	411	elif data_rex.match(line):
	412	field = field_rex.sub('\g<1>', line)
	413	field = string.lower(field)
	414	data = data_rex.sub('\g<2>', line)
[754]	415
	416	if field == 'url':
	417	data = '\\url{' + data.strip() + '}'
[743]	418
	419	if field in ('author', 'editor'):
	420	entrycont[field] = bibtexauthor(data)
	421	line = ''
	422	elif field == 'title':
	423	line = bibtextitle(data, entrytype)
	424	elif field != '':
	425	line = removebraces(transformurls(data.strip()))
	426
	427	if line != '':
	428	line = latexreplacements(line)
	429	entrycont[field] = line
	430
	431
	432	# sort entries
	433	filecont.sort(entry_cmp)
	434
	435	# count the bibtex keys
	436	keytable = {}
	437	counttable = {}
	438	for entry in filecont:
	439	bibkey = entry[1]
	440	if not keytable.has_key(bibkey):
	441	keytable[bibkey] = 1
	442	else:
	443	keytable[bibkey] += 1
	444
	445	for bibkey in keytable.keys():
	446	counttable[bibkey] = 0
	447
	448	# generate output
	449	for entry in filecont:
	450	# generate output key form the bibtex key
	451	bibkey = entry[1]
[745]	452	entryid = entry[2]
[743]	453	if keytable[bibkey] == 1:
	454	outkey = bibkey
	455	else:
	456	outkey = bibkey + chr(97 + counttable[bibkey])
	457	counttable[bibkey] += 1
	458
	459	# append the entry code to the output
[745]	460	file.append('\\section ' + entryid + ' [' + outkey + ']')
	461	file.append('<div style="' + divstyle + '">')
	462	for line in entry[3:]:
[743]	463	file.append(line)
[745]	464	file.append('</div>')
[743]	465	file.append('')
	466
	467	return file
	468
	469
	470	#
	471	# return 1 iff abbr is in line but not inside braces or quotes
	472	# assumes that abbr appears only once on the line (out of braces and quotes)
	473	#
	474	def verify_out_of_braces(line, abbr):
	475
	476	phrase_split = delimiter_rex.split(line)
	477
	478	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
	479
	480	open_brace = 0
	481	open_quote = 0
	482
	483	for phrase in phrase_split:
	484	if phrase == "{":
	485	open_brace = open_brace + 1
	486	elif phrase == "}":
	487	open_brace = open_brace - 1
	488	elif phrase == '"':
	489	if open_quote == 1:
	490	open_quote = 0
	491	else:
	492	open_quote = 1
	493	elif abbr_rex.search(phrase):
	494	if open_brace == 0 and open_quote == 0:
	495	return 1
	496
	497	return 0
	498
	499
	500	#
	501	# a line in the form phrase1 # phrase2 # ... # phrasen
	502	# is returned as phrase1 phrase2 ... phrasen
	503	# with the correct punctuation
	504	# Bug: Doesn't always work with multiple abbreviations plugged in
	505	#
	506	def concat_line(line):
	507	# only look at part after equals
	508	field = field_rex.sub('\g<1>',line)
	509	rest = field_rex.sub('\g<2>',line)
	510
	511	concat_line = field + ' ='
	512
	513	pound_split = concatsplit_rex.split(rest)
	514
	515	phrase_count = 0
	516	length = len(pound_split)
	517
	518	for phrase in pound_split:
	519	phrase = phrase.strip()
	520	if phrase_count != 0:
	521	if phrase.startswith('"') or phrase.startswith('{'):
	522	phrase = phrase[1:]
	523	elif phrase.startswith('"'):
	524	phrase = phrase.replace('"','{',1)
	525
	526	if phrase_count != length-1:
	527	if phrase.endswith('"') or phrase.endswith('}'):
	528	phrase = phrase[:-1]
	529	else:
	530	if phrase.endswith('"'):
	531	phrase = phrase[:-1]
	532	phrase = phrase + "}"
	533	elif phrase.endswith('",'):
	534	phrase = phrase[:-2]
	535	phrase = phrase + "},"
	536
	537	# if phrase did have \#, add the \# back
	538	if phrase.endswith('\\'):
	539	phrase = phrase + "#"
	540	concat_line = concat_line + ' ' + phrase
	541
	542	phrase_count = phrase_count + 1
	543
	544	return concat_line
	545
	546
	547	#
	548	# substitute abbreviations into filecont
	549	# @param filecont_source - string of data from file
	550	#
	551	def bibtex_replace_abbreviations(filecont_source):
	552	filecont = filecont_source.splitlines()
	553
	554	# These are defined in bibtex, so we'll define them too
	555	abbr_list = ['jan','feb','mar','apr','may','jun',
	556	'jul','aug','sep','oct','nov','dec']
	557	value_list = ['January','February','March','April',
	558	'May','June','July','August','September',
	559	'October','November','December']
	560
	561	abbr_rex = []
	562	total_abbr_count = 0
	563
	564	front = '\\b'
	565	back = '(,?)\\b'
	566
	567	for x in abbr_list:
	568	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
	569	total_abbr_count = total_abbr_count + 1
	570
	571
	572	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
	573	re.I)
	574
	575	comment_rex = re.compile('@comment\s*{',re.I)
	576	preamble_rex = re.compile('@preamble\s*{',re.I)
	577
	578	waiting_for_end_string = 0
	579	i = 0
	580	filecont2 = ''
	581
	582	for line in filecont:
	583	if line == ' ' or line == '':
	584	continue
	585
	586	if waiting_for_end_string:
	587	if re.search('}',line):
	588	waiting_for_end_string = 0
	589	continue
	590
	591	if abbrdef_rex.search(line):
	592	abbr = abbrdef_rex.sub('\g<1>', line)
	593
	594	if abbr_list.count(abbr) == 0:
	595	val = abbrdef_rex.sub('\g<2>', line)
	596	abbr_list.append(abbr)
	597	value_list.append(string.strip(val))
	598	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
	599	total_abbr_count = total_abbr_count + 1
	600	waiting_for_end_string = 1
	601	continue
	602
	603	if comment_rex.search(line):
	604	waiting_for_end_string = 1
	605	continue
	606
	607	if preamble_rex.search(line):
	608	waiting_for_end_string = 1
	609	continue
	610
	611
	612	# replace subsequent abbreviations with the value
	613	abbr_count = 0
	614
	615	for x in abbr_list:
	616
	617	if abbr_rex[abbr_count].search(line):
	618	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
	619	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
	620	# Check for # concatenations
	621	if concatsplit_rex.search(line):
	622	line = concat_line(line)
	623	abbr_count = abbr_count + 1
	624
	625
	626	filecont2 = filecont2 + line + '\n'
	627	i = i+1
	628
	629
	630	# Do one final pass over file
	631
	632	# make sure that didn't end up with {" or }" after the substitution
	633	filecont2 = filecont2.replace('{"','{{')
	634	filecont2 = filecont2.replace('"}','}}')
	635
	636	afterquotevalue_rex = re.compile('"\s,\s')
	637	afterbrace_rex = re.compile('"\s*}')
	638	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
	639
	640	# add new lines to data that changed because of abbreviation substitutions
	641	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
	642	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
	643	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
	644
	645	return filecont2
	646
	647	#
	648	# convert @type( ... ) to @type{ ... }
	649	#
	650	def no_outer_parens(filecont):
	651
	652	# do checking for open parens
	653	# will convert to braces
	654	paren_split = re.split('([(){}])',filecont)
	655
	656	open_paren_count = 0
	657	open_type = 0
	658	look_next = 0
	659
	660	# rebuild filecont
	661	filecont = ''
	662
	663	at_rex = re.compile('@\w*')
	664
	665	for phrase in paren_split:
	666	if look_next == 1:
	667	if phrase == '(':
	668	phrase = '{'
	669	open_paren_count = open_paren_count + 1
	670	else:
	671	open_type = 0
	672	look_next = 0
	673
	674	if phrase == '(':
	675	open_paren_count = open_paren_count + 1
	676
	677	elif phrase == ')':
	678	open_paren_count = open_paren_count - 1
	679	if open_type == 1 and open_paren_count == 0:
	680	phrase = '}'
	681	open_type = 0
	682
	683	elif at_rex.search( phrase ):
	684	open_type = 1
	685	look_next = 1
	686
	687	filecont = filecont + phrase
	688
	689	return filecont
	690
	691
	692	#
	693	# make all whitespace into just one space
	694	# format the bibtex file into a usable form.
	695	#
	696	def bibtexwasher(filecont_source):
	697
	698	space_rex = re.compile('\s+')
	699	comment_rex = re.compile('\s*%')
	700
	701	filecont = []
	702
	703	# remove trailing and excessive whitespace
	704	# ignore comments
	705	for line in filecont_source:
	706	line = string.strip(line)
	707	line = space_rex.sub(' ', line)
	708	# ignore comments
	709	if not comment_rex.match(line) and line != '':
	710	filecont.append(' '+ line)
	711
	712	filecont = string.join(filecont, '')
	713
	714	# the file is in one long string
	715
	716	filecont = no_outer_parens(filecont)
	717
	718	#
	719	# split lines according to preferred syntax scheme
	720	#
	721	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
	722
	723	# add new lines after commas that are after values
	724	filecont = re.sub('"\s*,', '",\n', filecont)
	725	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
	726	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
	727	'\n\n\g<1>\g<2>,\n', filecont)
	728
	729	# add new lines after }
	730	filecont = re.sub('"\s*}','"\n}\n', filecont)
	731	filecont = re.sub('}\s*,','},\n', filecont)
	732
	733
	734	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
	735
	736	# character encoding, reserved latex characters
	737	filecont = re.sub('{\\\&}', '&', filecont)
	738	filecont = re.sub('\\\&', '&', filecont)
	739
	740	# do checking for open braces to get format correct
	741	open_brace_count = 0
	742	brace_split = re.split('([{}])',filecont)
	743
	744	# rebuild filecont
	745	filecont = ''
	746
	747	for phrase in brace_split:
	748	if phrase == '{':
	749	open_brace_count = open_brace_count + 1
	750	elif phrase == '}':
	751	open_brace_count = open_brace_count - 1
	752	if open_brace_count == 0:
	753	filecont = filecont + '\n'
	754
	755	filecont = filecont + phrase
	756
	757	filecont2 = bibtex_replace_abbreviations(filecont)
	758
	759	# gather
	760	filecont = filecont2.splitlines()
	761	i=0
	762	j=0 # count the number of blank lines
	763	for line in filecont:
	764	# ignore blank lines
	765	if line == '' or line == ' ':
	766	j = j+1
	767	continue
	768	filecont[i] = line + '\n'
	769	i = i+1
	770
	771	# get rid of the extra stuff at the end of the array
	772	# (The extra stuff are duplicates that are in the array because
	773	# blank lines were removed.)
	774	length = len( filecont)
	775	filecont[length-j:length] = []
	776
	777	return filecont
	778
	779
	780	def filehandler(filepath):
	781	try:
	782	fd = open(filepath, 'r')
	783	filecont_source = fd.readlines()
	784	fd.close()
	785	except:
	786	print 'Could not open file:', filepath
	787	washeddata = bibtexwasher(filecont_source)
	788	outdata = bibtexdecoder(washeddata)
	789	print '/**'
	790	print '\page references References'
	791	print
	792	for line in outdata:
	793	print line
	794	print '*/'
	795
	796
	797	# main program
	798
	799	def main():
	800	import sys
	801	if sys.argv[1:]:
	802	filepath = sys.argv[1]
	803	else:
	804	print "No input file"
	805	sys.exit()
	806	filehandler(filepath)
	807
	808	if __name__ == "__main__": main()
	809
	810
	811	# end python script

Note: See TracBrowser for help on using the repository browser.

Download in other formats: