COIN-OR::LEMON - Graph Library

Context Navigation

source: lemon-main/scripts/bib2dox.py @ 841:aa8c9008b3de

Last change on this file since 841:aa8c9008b3de was 836:c841ae1aca29, checked in by Peter Kovacs <kpeter@…>, 15 years ago
Modify the header of scripts/bib2dox.py (#184)
Property exe set to ``*
File size: 25.7 KB

Rev	Line
[836]	1	#! /usr/bin/env python
[743]	2	"""
	3	BibTeX to Doxygen converter
	4	Usage: python bib2dox.py bibfile.bib > bibfile.dox
	5
[836]	6	This file is a part of LEMON, a generic C++ optimization library.
	7
	8	**********************************************************************
	9
[743]	10	This code is the modification of the BibTeX to XML converter
[836]	11	by Vidar Bronken Gundersen et al.
	12	See the original copyright notices below.
[743]	13
	14	**********************************************************************
	15
	16	Decoder for bibliographic data, BibTeX
	17	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
	18
	19	v.8
	20	(c)2002-06-23 Vidar Bronken Gundersen
	21	http://bibtexml.sf.net/
	22	Reuse approved as long as this notification is kept.
	23	Licence: GPL.
	24
	25	Contributions/thanks to:
	26	Egon Willighagen, http://sf.net/projects/jreferences/
	27	Richard Mahoney (for providing a test case)
	28
	29	Editted by Sara Sprenkle to be more robust and handle more bibtex features.
	30	(c) 2003-01-15
	31
	32	1. Changed bibtex: tags to bibxml: tags.
	33	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
	34	3. Allow spaces between @type and first {
	35	4. "author" fields with multiple authors split by " and "
	36	are put in separate xml "bibxml:author" tags.
	37	5. Option for Titles: words are capitalized
	38	only if first letter in title or capitalized inside braces
	39	6. Removes braces from within field values
	40	7. Ignores comments in bibtex file (including @comment{ or % )
	41	8. Replaces some special latex tags, e.g., replaces ~ with ' '
	42	9. Handles bibtex @string abbreviations
	43	--> includes bibtex's default abbreviations for months
	44	--> does concatenation of abbr # " more " and " more " # abbr
	45	10. Handles @type( ... ) or @type{ ... }
	46	11. The keywords field is split on , or ; and put into separate xml
	47	"bibxml:keywords" tags
	48	12. Ignores @preamble
	49
	50	Known Limitations
	51	1. Does not transform Latex encoding like math mode and special
	52	latex symbols.
	53	2. Does not parse author fields into first and last names.
	54	E.g., It does not do anything special to an author whose name is
	55	in the form LAST_NAME, FIRST_NAME
	56	In "author" tag, will show up as
	57	<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
	58	3. Does not handle "crossref" fields other than to print
	59	<bibxml:crossref>...</bibxml:crossref>
	60	4. Does not inform user of the input's format errors. You just won't
	61	be able to transform the file later with XSL
	62
	63	You will have to manually edit the XML output if you need to handle
	64	these (and unknown) limitations.
	65
	66	"""
	67
	68	import string, re
	69
	70	# set of valid name characters
	71	valid_name_chars = '[\w\-:]'
	72
	73	#
	74	# define global regular expression variables
	75	#
	76	author_rex = re.compile('\s+and\s+')
	77	rembraces_rex = re.compile('[{}]')
[754]	78	capitalize_rex = re.compile('({[^}]*})')
[743]	79
	80	# used by bibtexkeywords(data)
	81	keywords_rex = re.compile('[,;]')
	82
	83	# used by concat_line(line)
	84	concatsplit_rex = re.compile('\s#\s')
	85
	86	# split on {, }, or " in verify_out_of_braces
	87	delimiter_rex = re.compile('([{}"])',re.I)
	88
	89	field_rex = re.compile('\s(\w)\s=\s(.*)')
	90	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
	91
	92	url_rex = re.compile('\\\url\{([^}]*)\}')
	93
[745]	94	#
	95	# styles for html formatting
	96	#
	97	divstyle = 'margin-top: -4ex; margin-left: 8em;'
[743]	98
	99	#
	100	# return the string parameter without braces
	101	#
	102	def transformurls(str):
	103	return url_rex.sub(r'<a href="\1">\1</a>', str)
	104
	105	#
	106	# return the string parameter without braces
	107	#
	108	def removebraces(str):
	109	return rembraces_rex.sub('', str)
	110
	111	#
	112	# latex-specific replacements
	113	# (do this after braces were removed)
	114	#
	115	def latexreplacements(line):
	116	line = string.replace(line, '~', ' ')
	117	line = string.replace(line, '\\\'a', 'á')
	118	line = string.replace(line, '\\"a', 'ä')
	119	line = string.replace(line, '\\\'e', 'é')
	120	line = string.replace(line, '\\"e', 'ë')
	121	line = string.replace(line, '\\\'i', 'í')
	122	line = string.replace(line, '\\"i', 'ï')
	123	line = string.replace(line, '\\\'o', 'ó')
	124	line = string.replace(line, '\\"o', 'ö')
	125	line = string.replace(line, '\\\'u', 'ú')
	126	line = string.replace(line, '\\"u', 'ü')
	127	line = string.replace(line, '\\H o', 'õ')
	128	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
	129	line = string.replace(line, '\\\'A', 'Á')
	130	line = string.replace(line, '\\"A', 'Ä')
	131	line = string.replace(line, '\\\'E', 'É')
	132	line = string.replace(line, '\\"E', 'Ë')
	133	line = string.replace(line, '\\\'I', 'Í')
	134	line = string.replace(line, '\\"I', 'Ï')
	135	line = string.replace(line, '\\\'O', 'Ó')
	136	line = string.replace(line, '\\"O', 'Ö')
	137	line = string.replace(line, '\\\'U', 'Ú')
	138	line = string.replace(line, '\\"U', 'Ü')
	139	line = string.replace(line, '\\H O', 'Õ')
	140	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
	141
	142	return line
	143
	144	#
	145	# copy characters form a string decoding html expressions (&xyz;)
	146	#
	147	def copychars(str, ifrom, count):
	148	result = ''
	149	i = ifrom
	150	c = 0
	151	html_spec = False
	152	while (i < len(str)) and (c < count):
	153	if str[i] == '&':
	154	html_spec = True;
	155	if i+1 < len(str):
	156	result += str[i+1]
	157	c += 1
	158	i += 2
	159	else:
	160	if not html_spec:
	161	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
	162	((str[i] >= 'a') and (str[i] <= 'z')):
	163	result += str[i]
	164	c += 1
	165	elif str[i] == ';':
	166	html_spec = False;
	167	i += 1
	168
	169	return result
	170
	171
	172	#
	173	# Handle a list of authors (separated by 'and').
	174	# It gives back an array of the follwing values:
	175	# - num: the number of authors,
	176	# - list: the list of the author names,
	177	# - text: the bibtex text (separated by commas and/or 'and')
	178	# - abbrev: abbreviation that can be used for indicate the
	179	# bibliography entries
	180	#
	181	def bibtexauthor(data):
	182	result = {}
	183	bibtex = ''
	184	result['list'] = author_rex.split(data)
	185	result['num'] = len(result['list'])
	186	for i, author in enumerate(result['list']):
	187	# general transformations
	188	author = latexreplacements(removebraces(author.strip()))
	189	# transform "Xyz, A. B." to "A. B. Xyz"
	190	pos = author.find(',')
	191	if pos != -1:
	192	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
	193	result['list'][i] = author
	194	bibtex += author + '#'
	195	bibtex = bibtex[:-1]
	196	if result['num'] > 1:
	197	ix = bibtex.rfind('#')
	198	if result['num'] == 2:
	199	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
	200	else:
	201	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
	202	bibtex = bibtex.replace('#', ', ')
	203	result['text'] = bibtex
	204
	205	result['abbrev'] = ''
	206	for author in result['list']:
	207	pos = author.rfind(' ') + 1
	208	count = 1
	209	if result['num'] == 1:
	210	count = 3
	211	result['abbrev'] += copychars(author, pos, count)
	212
	213	return result
	214
	215
	216	#
	217	# data = title string
	218	# @return the capitalized title (first letter is capitalized), rest are capitalized
	219	# only if capitalized inside braces
	220	#
	221	def capitalizetitle(data):
	222	title_list = capitalize_rex.split(data)
	223	title = ''
	224	count = 0
	225	for phrase in title_list:
	226	check = string.lstrip(phrase)
	227
	228	# keep phrase's capitalization the same
	229	if check.find('{') == 0:
	230	title += removebraces(phrase)
	231	else:
	232	# first word --> capitalize first letter (after spaces)
	233	if count == 0:
	234	title += check.capitalize()
	235	else:
	236	title += phrase.lower()
	237	count = count + 1
	238
	239	return title
	240
	241
	242	#
	243	# @return the bibtex for the title
	244	# @param data --> title string
	245	# braces are removed from title
	246	#
	247	def bibtextitle(data, entrytype):
	248	if entrytype in ('book', 'inbook'):
	249	title = removebraces(data.strip())
	250	else:
	251	title = removebraces(capitalizetitle(data.strip()))
	252	bibtex = title
	253	return bibtex
	254
	255
	256	#
	257	# function to compare entry lists
	258	#
	259	def entry_cmp(x, y):
	260	return cmp(x[0], y[0])
	261
	262
	263	#
	264	# print the XML for the transformed "filecont_source"
	265	#
	266	def bibtexdecoder(filecont_source):
	267	filecont = []
	268	file = []
	269
	270	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
	271	pubtype_rex = re.compile('@(\w)\s{\s(.),')
	272	endtype_rex = re.compile('}\s*$')
	273	endtag_rex = re.compile('^\s}\s$')
	274
	275	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
	276	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
	277
	278	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
	279	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
	280
	281	for line in filecont_source:
	282	line = line[:-1]
	283
	284	# encode character entities
	285	line = string.replace(line, '&', '&')
	286	line = string.replace(line, '<', '<')
	287	line = string.replace(line, '>', '>')
	288
	289	# start entry: publication type (store for later use)
	290	if pubtype_rex.match(line):
	291	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
	292	entrycont = {}
	293	entry = []
	294	entrytype = pubtype_rex.sub('\g<1>',line)
	295	entrytype = string.lower(entrytype)
[745]	296	entryid = pubtype_rex.sub('\g<2>', line)
[743]	297
	298	# end entry if just a }
	299	elif endtype_rex.match(line):
	300	# generate doxygen code for the entry
	301
	302	# enty type related formattings
	303	if entrytype in ('book', 'inbook'):
	304	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
	305	if not entrycont.has_key('author'):
	306	entrycont['author'] = entrycont['editor']
	307	entrycont['author']['text'] += ', editors'
	308	elif entrytype == 'article':
	309	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
	310	elif entrytype in ('inproceedings', 'incollection', 'conference'):
	311	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
	312	elif entrytype == 'techreport':
	313	if not entrycont.has_key('type'):
	314	entrycont['type'] = 'Technical report'
	315	elif entrytype == 'mastersthesis':
	316	entrycont['type'] = 'Master\'s thesis'
	317	elif entrytype == 'phdthesis':
	318	entrycont['type'] = 'PhD thesis'
	319
	320	for eline in entrycont:
	321	if eline != '':
	322	eline = latexreplacements(eline)
	323
	324	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
	325	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
	326
	327	if entrycont.has_key('author') and (entrycont['author'] != ''):
	328	entry.append(entrycont['author']['text'] + '.')
	329	if entrycont.has_key('title') and (entrycont['title'] != ''):
	330	entry.append(entrycont['title'] + '.')
	331	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
	332	entry.append(entrycont['journal'] + ',')
	333	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
	334	entry.append('In ' + entrycont['booktitle'] + ',')
	335	if entrycont.has_key('type') and (entrycont['type'] != ''):
	336	eline = entrycont['type']
	337	if entrycont.has_key('number') and (entrycont['number'] != ''):
	338	eline += ' ' + entrycont['number']
	339	eline += ','
	340	entry.append(eline)
	341	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
	342	entry.append(entrycont['institution'] + ',')
	343	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
	344	entry.append(entrycont['publisher'] + ',')
	345	if entrycont.has_key('school') and (entrycont['school'] != ''):
	346	entry.append(entrycont['school'] + ',')
	347	if entrycont.has_key('address') and (entrycont['address'] != ''):
	348	entry.append(entrycont['address'] + ',')
	349	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
	350	entry.append(entrycont['edition'] + ' edition,')
	351	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
	352	entry.append(entrycont['howpublished'] + ',')
	353	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
	354	eline = entrycont['volume'];
	355	if entrycont.has_key('number') and (entrycont['number'] != ''):
	356	eline += '(' + entrycont['number'] + ')'
	357	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
	358	eline += ':' + entrycont['pages']
	359	eline += ','
	360	entry.append(eline)
	361	else:
	362	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
	363	entry.append('pages ' + entrycont['pages'] + ',')
	364	if entrycont.has_key('year') and (entrycont['year'] != ''):
	365	if entrycont.has_key('month') and (entrycont['month'] != ''):
	366	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
	367	else:
	368	entry.append(entrycont['year'] + '.')
	369	if entrycont.has_key('note') and (entrycont['note'] != ''):
	370	entry.append(entrycont['note'] + '.')
[754]	371	if entrycont.has_key('url') and (entrycont['url'] != ''):
	372	entry.append(entrycont['url'] + '.')
[743]	373
	374	# generate keys for sorting and for the output
	375	sortkey = ''
	376	bibkey = ''
	377	if entrycont.has_key('author'):
	378	for author in entrycont['author']['list']:
	379	sortkey += copychars(author, author.rfind(' ')+1, len(author))
	380	bibkey = entrycont['author']['abbrev']
	381	else:
	382	bibkey = 'x'
	383	if entrycont.has_key('year'):
	384	sortkey += entrycont['year']
	385	bibkey += entrycont['year'][-2:]
	386	if entrycont.has_key('title'):
	387	sortkey += entrycont['title']
	388	if entrycont.has_key('key'):
	389	sortkey = entrycont['key'] + sortkey
	390	bibkey = entrycont['key']
	391	entry.insert(0, sortkey)
	392	entry.insert(1, bibkey)
[745]	393	entry.insert(2, entryid)
[743]	394
	395	# add the entry to the file contents
	396	filecont.append(entry)
	397
	398	else:
	399	# field, publication info
	400	field = ''
	401	data = ''
	402
	403	# field = {data} entries
	404	if bracedata_rex.match(line):
	405	field = bracefield_rex.sub('\g<1>', line)
	406	field = string.lower(field)
	407	data = bracedata_rex.sub('\g<2>', line)
	408
	409	# field = "data" entries
	410	elif quotedata_rex.match(line):
	411	field = quotefield_rex.sub('\g<1>', line)
	412	field = string.lower(field)
	413	data = quotedata_rex.sub('\g<2>', line)
	414
	415	# field = data entries
	416	elif data_rex.match(line):
	417	field = field_rex.sub('\g<1>', line)
	418	field = string.lower(field)
	419	data = data_rex.sub('\g<2>', line)
[754]	420
	421	if field == 'url':
	422	data = '\\url{' + data.strip() + '}'
[743]	423
	424	if field in ('author', 'editor'):
	425	entrycont[field] = bibtexauthor(data)
	426	line = ''
	427	elif field == 'title':
	428	line = bibtextitle(data, entrytype)
	429	elif field != '':
	430	line = removebraces(transformurls(data.strip()))
	431
	432	if line != '':
	433	line = latexreplacements(line)
	434	entrycont[field] = line
	435
	436
	437	# sort entries
	438	filecont.sort(entry_cmp)
	439
	440	# count the bibtex keys
	441	keytable = {}
	442	counttable = {}
	443	for entry in filecont:
	444	bibkey = entry[1]
	445	if not keytable.has_key(bibkey):
	446	keytable[bibkey] = 1
	447	else:
	448	keytable[bibkey] += 1
	449
	450	for bibkey in keytable.keys():
	451	counttable[bibkey] = 0
	452
	453	# generate output
	454	for entry in filecont:
	455	# generate output key form the bibtex key
	456	bibkey = entry[1]
[745]	457	entryid = entry[2]
[743]	458	if keytable[bibkey] == 1:
	459	outkey = bibkey
	460	else:
	461	outkey = bibkey + chr(97 + counttable[bibkey])
	462	counttable[bibkey] += 1
	463
	464	# append the entry code to the output
[745]	465	file.append('\\section ' + entryid + ' [' + outkey + ']')
	466	file.append('<div style="' + divstyle + '">')
	467	for line in entry[3:]:
[743]	468	file.append(line)
[745]	469	file.append('</div>')
[743]	470	file.append('')
	471
	472	return file
	473
	474
	475	#
	476	# return 1 iff abbr is in line but not inside braces or quotes
	477	# assumes that abbr appears only once on the line (out of braces and quotes)
	478	#
	479	def verify_out_of_braces(line, abbr):
	480
	481	phrase_split = delimiter_rex.split(line)
	482
	483	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
	484
	485	open_brace = 0
	486	open_quote = 0
	487
	488	for phrase in phrase_split:
	489	if phrase == "{":
	490	open_brace = open_brace + 1
	491	elif phrase == "}":
	492	open_brace = open_brace - 1
	493	elif phrase == '"':
	494	if open_quote == 1:
	495	open_quote = 0
	496	else:
	497	open_quote = 1
	498	elif abbr_rex.search(phrase):
	499	if open_brace == 0 and open_quote == 0:
	500	return 1
	501
	502	return 0
	503
	504
	505	#
	506	# a line in the form phrase1 # phrase2 # ... # phrasen
	507	# is returned as phrase1 phrase2 ... phrasen
	508	# with the correct punctuation
	509	# Bug: Doesn't always work with multiple abbreviations plugged in
	510	#
	511	def concat_line(line):
	512	# only look at part after equals
	513	field = field_rex.sub('\g<1>',line)
	514	rest = field_rex.sub('\g<2>',line)
	515
	516	concat_line = field + ' ='
	517
	518	pound_split = concatsplit_rex.split(rest)
	519
	520	phrase_count = 0
	521	length = len(pound_split)
	522
	523	for phrase in pound_split:
	524	phrase = phrase.strip()
	525	if phrase_count != 0:
	526	if phrase.startswith('"') or phrase.startswith('{'):
	527	phrase = phrase[1:]
	528	elif phrase.startswith('"'):
	529	phrase = phrase.replace('"','{',1)
	530
	531	if phrase_count != length-1:
	532	if phrase.endswith('"') or phrase.endswith('}'):
	533	phrase = phrase[:-1]
	534	else:
	535	if phrase.endswith('"'):
	536	phrase = phrase[:-1]
	537	phrase = phrase + "}"
	538	elif phrase.endswith('",'):
	539	phrase = phrase[:-2]
	540	phrase = phrase + "},"
	541
	542	# if phrase did have \#, add the \# back
	543	if phrase.endswith('\\'):
	544	phrase = phrase + "#"
	545	concat_line = concat_line + ' ' + phrase
	546
	547	phrase_count = phrase_count + 1
	548
	549	return concat_line
	550
	551
	552	#
	553	# substitute abbreviations into filecont
	554	# @param filecont_source - string of data from file
	555	#
	556	def bibtex_replace_abbreviations(filecont_source):
	557	filecont = filecont_source.splitlines()
	558
	559	# These are defined in bibtex, so we'll define them too
	560	abbr_list = ['jan','feb','mar','apr','may','jun',
	561	'jul','aug','sep','oct','nov','dec']
	562	value_list = ['January','February','March','April',
	563	'May','June','July','August','September',
	564	'October','November','December']
	565
	566	abbr_rex = []
	567	total_abbr_count = 0
	568
	569	front = '\\b'
	570	back = '(,?)\\b'
	571
	572	for x in abbr_list:
	573	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
	574	total_abbr_count = total_abbr_count + 1
	575
	576
	577	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
	578	re.I)
	579
	580	comment_rex = re.compile('@comment\s*{',re.I)
	581	preamble_rex = re.compile('@preamble\s*{',re.I)
	582
	583	waiting_for_end_string = 0
	584	i = 0
	585	filecont2 = ''
	586
	587	for line in filecont:
	588	if line == ' ' or line == '':
	589	continue
	590
	591	if waiting_for_end_string:
	592	if re.search('}',line):
	593	waiting_for_end_string = 0
	594	continue
	595
	596	if abbrdef_rex.search(line):
	597	abbr = abbrdef_rex.sub('\g<1>', line)
	598
	599	if abbr_list.count(abbr) == 0:
	600	val = abbrdef_rex.sub('\g<2>', line)
	601	abbr_list.append(abbr)
	602	value_list.append(string.strip(val))
	603	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
	604	total_abbr_count = total_abbr_count + 1
	605	waiting_for_end_string = 1
	606	continue
	607
	608	if comment_rex.search(line):
	609	waiting_for_end_string = 1
	610	continue
	611
	612	if preamble_rex.search(line):
	613	waiting_for_end_string = 1
	614	continue
	615
	616
	617	# replace subsequent abbreviations with the value
	618	abbr_count = 0
	619
	620	for x in abbr_list:
	621
	622	if abbr_rex[abbr_count].search(line):
	623	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
	624	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
	625	# Check for # concatenations
	626	if concatsplit_rex.search(line):
	627	line = concat_line(line)
	628	abbr_count = abbr_count + 1
	629
	630
	631	filecont2 = filecont2 + line + '\n'
	632	i = i+1
	633
	634
	635	# Do one final pass over file
	636
	637	# make sure that didn't end up with {" or }" after the substitution
	638	filecont2 = filecont2.replace('{"','{{')
	639	filecont2 = filecont2.replace('"}','}}')
	640
	641	afterquotevalue_rex = re.compile('"\s,\s')
	642	afterbrace_rex = re.compile('"\s*}')
	643	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
	644
	645	# add new lines to data that changed because of abbreviation substitutions
	646	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
	647	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
	648	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
	649
	650	return filecont2
	651
	652	#
	653	# convert @type( ... ) to @type{ ... }
	654	#
	655	def no_outer_parens(filecont):
	656
	657	# do checking for open parens
	658	# will convert to braces
	659	paren_split = re.split('([(){}])',filecont)
	660
	661	open_paren_count = 0
	662	open_type = 0
	663	look_next = 0
	664
	665	# rebuild filecont
	666	filecont = ''
	667
	668	at_rex = re.compile('@\w*')
	669
	670	for phrase in paren_split:
	671	if look_next == 1:
	672	if phrase == '(':
	673	phrase = '{'
	674	open_paren_count = open_paren_count + 1
	675	else:
	676	open_type = 0
	677	look_next = 0
	678
	679	if phrase == '(':
	680	open_paren_count = open_paren_count + 1
	681
	682	elif phrase == ')':
	683	open_paren_count = open_paren_count - 1
	684	if open_type == 1 and open_paren_count == 0:
	685	phrase = '}'
	686	open_type = 0
	687
	688	elif at_rex.search( phrase ):
	689	open_type = 1
	690	look_next = 1
	691
	692	filecont = filecont + phrase
	693
	694	return filecont
	695
	696
	697	#
	698	# make all whitespace into just one space
	699	# format the bibtex file into a usable form.
	700	#
	701	def bibtexwasher(filecont_source):
	702
	703	space_rex = re.compile('\s+')
	704	comment_rex = re.compile('\s*%')
	705
	706	filecont = []
	707
	708	# remove trailing and excessive whitespace
	709	# ignore comments
	710	for line in filecont_source:
	711	line = string.strip(line)
	712	line = space_rex.sub(' ', line)
	713	# ignore comments
	714	if not comment_rex.match(line) and line != '':
	715	filecont.append(' '+ line)
	716
	717	filecont = string.join(filecont, '')
	718
	719	# the file is in one long string
	720
	721	filecont = no_outer_parens(filecont)
	722
	723	#
	724	# split lines according to preferred syntax scheme
	725	#
	726	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
	727
	728	# add new lines after commas that are after values
	729	filecont = re.sub('"\s*,', '",\n', filecont)
	730	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
	731	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
	732	'\n\n\g<1>\g<2>,\n', filecont)
	733
	734	# add new lines after }
	735	filecont = re.sub('"\s*}','"\n}\n', filecont)
	736	filecont = re.sub('}\s*,','},\n', filecont)
	737
	738
	739	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
	740
	741	# character encoding, reserved latex characters
	742	filecont = re.sub('{\\\&}', '&', filecont)
	743	filecont = re.sub('\\\&', '&', filecont)
	744
	745	# do checking for open braces to get format correct
	746	open_brace_count = 0
	747	brace_split = re.split('([{}])',filecont)
	748
	749	# rebuild filecont
	750	filecont = ''
	751
	752	for phrase in brace_split:
	753	if phrase == '{':
	754	open_brace_count = open_brace_count + 1
	755	elif phrase == '}':
	756	open_brace_count = open_brace_count - 1
	757	if open_brace_count == 0:
	758	filecont = filecont + '\n'
	759
	760	filecont = filecont + phrase
	761
	762	filecont2 = bibtex_replace_abbreviations(filecont)
	763
	764	# gather
	765	filecont = filecont2.splitlines()
	766	i=0
	767	j=0 # count the number of blank lines
	768	for line in filecont:
	769	# ignore blank lines
	770	if line == '' or line == ' ':
	771	j = j+1
	772	continue
	773	filecont[i] = line + '\n'
	774	i = i+1
	775
	776	# get rid of the extra stuff at the end of the array
	777	# (The extra stuff are duplicates that are in the array because
	778	# blank lines were removed.)
	779	length = len( filecont)
	780	filecont[length-j:length] = []
	781
	782	return filecont
	783
	784
	785	def filehandler(filepath):
	786	try:
	787	fd = open(filepath, 'r')
	788	filecont_source = fd.readlines()
	789	fd.close()
	790	except:
	791	print 'Could not open file:', filepath
	792	washeddata = bibtexwasher(filecont_source)
	793	outdata = bibtexdecoder(washeddata)
	794	print '/**'
	795	print '\page references References'
	796	print
	797	for line in outdata:
	798	print line
	799	print '*/'
	800
	801
	802	# main program
	803
	804	def main():
	805	import sys
	806	if sys.argv[1:]:
	807	filepath = sys.argv[1]
	808	else:
	809	print "No input file"
	810	sys.exit()
	811	filehandler(filepath)
	812
	813	if __name__ == "__main__": main()
	814
	815
	816	# end python script

Note: See TracBrowser for help on using the repository browser.

Download in other formats: