COIN-OR::LEMON - Graph Library

Context Navigation

source: lemon/scripts/bib2dox.py @ 792:68792fb2870f

Last change on this file since 792:68792fb2870f was 792:68792fb2870f, checked in by Peter Kovacs <kpeter@…>, 15 years ago
Improve bib2dox.py using \section for entiries (#184)
File size: 25.4 KB

Line
1	#!/usr/bin/env /usr/local/Python/bin/python2.1
2	"""
3	BibTeX to Doxygen converter
4	Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6	This code is the modification of the BibTeX to XML converter
7	by Vidar Bronken Gundersen et al. See the original copyright notices below.
8
9	**********************************************************************
10
11	Decoder for bibliographic data, BibTeX
12	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13
14	v.8
15	(c)2002-06-23 Vidar Bronken Gundersen
16	http://bibtexml.sf.net/
17	Reuse approved as long as this notification is kept.
18	Licence: GPL.
19
20	Contributions/thanks to:
21	Egon Willighagen, http://sf.net/projects/jreferences/
22	Richard Mahoney (for providing a test case)
23
24	Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25	(c) 2003-01-15
26
27	1. Changed bibtex: tags to bibxml: tags.
28	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
29	3. Allow spaces between @type and first {
30	4. "author" fields with multiple authors split by " and "
31	are put in separate xml "bibxml:author" tags.
32	5. Option for Titles: words are capitalized
33	only if first letter in title or capitalized inside braces
34	6. Removes braces from within field values
35	7. Ignores comments in bibtex file (including @comment{ or % )
36	8. Replaces some special latex tags, e.g., replaces ~ with ' '
37	9. Handles bibtex @string abbreviations
38	--> includes bibtex's default abbreviations for months
39	--> does concatenation of abbr # " more " and " more " # abbr
40	10. Handles @type( ... ) or @type{ ... }
41	11. The keywords field is split on , or ; and put into separate xml
42	"bibxml:keywords" tags
43	12. Ignores @preamble
44
45	Known Limitations
46	1. Does not transform Latex encoding like math mode and special
47	latex symbols.
48	2. Does not parse author fields into first and last names.
49	E.g., It does not do anything special to an author whose name is
50	in the form LAST_NAME, FIRST_NAME
51	In "author" tag, will show up as
52	<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53	3. Does not handle "crossref" fields other than to print
54	<bibxml:crossref>...</bibxml:crossref>
55	4. Does not inform user of the input's format errors. You just won't
56	be able to transform the file later with XSL
57
58	You will have to manually edit the XML output if you need to handle
59	these (and unknown) limitations.
60
61	"""
62
63	import string, re
64
65	# set of valid name characters
66	valid_name_chars = '[\w\-:]'
67
68	#
69	# define global regular expression variables
70	#
71	author_rex = re.compile('\s+and\s+')
72	rembraces_rex = re.compile('[{}]')
73	capitalize_rex = re.compile('({\w*})')
74
75	# used by bibtexkeywords(data)
76	keywords_rex = re.compile('[,;]')
77
78	# used by concat_line(line)
79	concatsplit_rex = re.compile('\s#\s')
80
81	# split on {, }, or " in verify_out_of_braces
82	delimiter_rex = re.compile('([{}"])',re.I)
83
84	field_rex = re.compile('\s(\w)\s=\s(.*)')
85	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
86
87	url_rex = re.compile('\\\url\{([^}]*)\}')
88
89	#
90	# styles for html formatting
91	#
92	divstyle = 'margin-top: -4ex; margin-left: 8em;'
93
94	#
95	# return the string parameter without braces
96	#
97	def transformurls(str):
98	return url_rex.sub(r'<a href="\1">\1</a>', str)
99
100	#
101	# return the string parameter without braces
102	#
103	def removebraces(str):
104	return rembraces_rex.sub('', str)
105
106	#
107	# latex-specific replacements
108	# (do this after braces were removed)
109	#
110	def latexreplacements(line):
111	line = string.replace(line, '~', ' ')
112	line = string.replace(line, '\\\'a', 'á')
113	line = string.replace(line, '\\"a', 'ä')
114	line = string.replace(line, '\\\'e', 'é')
115	line = string.replace(line, '\\"e', 'ë')
116	line = string.replace(line, '\\\'i', 'í')
117	line = string.replace(line, '\\"i', 'ï')
118	line = string.replace(line, '\\\'o', 'ó')
119	line = string.replace(line, '\\"o', 'ö')
120	line = string.replace(line, '\\\'u', 'ú')
121	line = string.replace(line, '\\"u', 'ü')
122	line = string.replace(line, '\\H o', 'õ')
123	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
124	line = string.replace(line, '\\\'A', 'Á')
125	line = string.replace(line, '\\"A', 'Ä')
126	line = string.replace(line, '\\\'E', 'É')
127	line = string.replace(line, '\\"E', 'Ë')
128	line = string.replace(line, '\\\'I', 'Í')
129	line = string.replace(line, '\\"I', 'Ï')
130	line = string.replace(line, '\\\'O', 'Ó')
131	line = string.replace(line, '\\"O', 'Ö')
132	line = string.replace(line, '\\\'U', 'Ú')
133	line = string.replace(line, '\\"U', 'Ü')
134	line = string.replace(line, '\\H O', 'Õ')
135	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
136
137	return line
138
139	#
140	# copy characters form a string decoding html expressions (&xyz;)
141	#
142	def copychars(str, ifrom, count):
143	result = ''
144	i = ifrom
145	c = 0
146	html_spec = False
147	while (i < len(str)) and (c < count):
148	if str[i] == '&':
149	html_spec = True;
150	if i+1 < len(str):
151	result += str[i+1]
152	c += 1
153	i += 2
154	else:
155	if not html_spec:
156	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
157	((str[i] >= 'a') and (str[i] <= 'z')):
158	result += str[i]
159	c += 1
160	elif str[i] == ';':
161	html_spec = False;
162	i += 1
163
164	return result
165
166
167	#
168	# Handle a list of authors (separated by 'and').
169	# It gives back an array of the follwing values:
170	# - num: the number of authors,
171	# - list: the list of the author names,
172	# - text: the bibtex text (separated by commas and/or 'and')
173	# - abbrev: abbreviation that can be used for indicate the
174	# bibliography entries
175	#
176	def bibtexauthor(data):
177	result = {}
178	bibtex = ''
179	result['list'] = author_rex.split(data)
180	result['num'] = len(result['list'])
181	for i, author in enumerate(result['list']):
182	# general transformations
183	author = latexreplacements(removebraces(author.strip()))
184	# transform "Xyz, A. B." to "A. B. Xyz"
185	pos = author.find(',')
186	if pos != -1:
187	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
188	result['list'][i] = author
189	bibtex += author + '#'
190	bibtex = bibtex[:-1]
191	if result['num'] > 1:
192	ix = bibtex.rfind('#')
193	if result['num'] == 2:
194	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
195	else:
196	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
197	bibtex = bibtex.replace('#', ', ')
198	result['text'] = bibtex
199
200	result['abbrev'] = ''
201	for author in result['list']:
202	pos = author.rfind(' ') + 1
203	count = 1
204	if result['num'] == 1:
205	count = 3
206	result['abbrev'] += copychars(author, pos, count)
207
208	return result
209
210
211	#
212	# data = title string
213	# @return the capitalized title (first letter is capitalized), rest are capitalized
214	# only if capitalized inside braces
215	#
216	def capitalizetitle(data):
217	title_list = capitalize_rex.split(data)
218	title = ''
219	count = 0
220	for phrase in title_list:
221	check = string.lstrip(phrase)
222
223	# keep phrase's capitalization the same
224	if check.find('{') == 0:
225	title += removebraces(phrase)
226	else:
227	# first word --> capitalize first letter (after spaces)
228	if count == 0:
229	title += check.capitalize()
230	else:
231	title += phrase.lower()
232	count = count + 1
233
234	return title
235
236
237	#
238	# @return the bibtex for the title
239	# @param data --> title string
240	# braces are removed from title
241	#
242	def bibtextitle(data, entrytype):
243	if entrytype in ('book', 'inbook'):
244	title = removebraces(data.strip())
245	else:
246	title = removebraces(capitalizetitle(data.strip()))
247	bibtex = title
248	return bibtex
249
250
251	#
252	# function to compare entry lists
253	#
254	def entry_cmp(x, y):
255	return cmp(x[0], y[0])
256
257
258	#
259	# print the XML for the transformed "filecont_source"
260	#
261	def bibtexdecoder(filecont_source):
262	filecont = []
263	file = []
264
265	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
266	pubtype_rex = re.compile('@(\w)\s{\s(.),')
267	endtype_rex = re.compile('}\s*$')
268	endtag_rex = re.compile('^\s}\s$')
269
270	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
271	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
272
273	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
274	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
275
276	for line in filecont_source:
277	line = line[:-1]
278
279	# encode character entities
280	line = string.replace(line, '&', '&')
281	line = string.replace(line, '<', '<')
282	line = string.replace(line, '>', '>')
283
284	# start entry: publication type (store for later use)
285	if pubtype_rex.match(line):
286	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
287	entrycont = {}
288	entry = []
289	entrytype = pubtype_rex.sub('\g<1>',line)
290	entrytype = string.lower(entrytype)
291	entryid = pubtype_rex.sub('\g<2>', line)
292
293	# end entry if just a }
294	elif endtype_rex.match(line):
295	# generate doxygen code for the entry
296
297	# enty type related formattings
298	if entrytype in ('book', 'inbook'):
299	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
300	if not entrycont.has_key('author'):
301	entrycont['author'] = entrycont['editor']
302	entrycont['author']['text'] += ', editors'
303	elif entrytype == 'article':
304	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
305	elif entrytype in ('inproceedings', 'incollection', 'conference'):
306	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
307	elif entrytype == 'techreport':
308	if not entrycont.has_key('type'):
309	entrycont['type'] = 'Technical report'
310	elif entrytype == 'mastersthesis':
311	entrycont['type'] = 'Master\'s thesis'
312	elif entrytype == 'phdthesis':
313	entrycont['type'] = 'PhD thesis'
314
315	for eline in entrycont:
316	if eline != '':
317	eline = latexreplacements(eline)
318
319	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
320	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
321
322	if entrycont.has_key('author') and (entrycont['author'] != ''):
323	entry.append(entrycont['author']['text'] + '.')
324	if entrycont.has_key('title') and (entrycont['title'] != ''):
325	entry.append(entrycont['title'] + '.')
326	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
327	entry.append(entrycont['journal'] + ',')
328	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
329	entry.append('In ' + entrycont['booktitle'] + ',')
330	if entrycont.has_key('type') and (entrycont['type'] != ''):
331	eline = entrycont['type']
332	if entrycont.has_key('number') and (entrycont['number'] != ''):
333	eline += ' ' + entrycont['number']
334	eline += ','
335	entry.append(eline)
336	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
337	entry.append(entrycont['institution'] + ',')
338	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
339	entry.append(entrycont['publisher'] + ',')
340	if entrycont.has_key('school') and (entrycont['school'] != ''):
341	entry.append(entrycont['school'] + ',')
342	if entrycont.has_key('address') and (entrycont['address'] != ''):
343	entry.append(entrycont['address'] + ',')
344	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
345	entry.append(entrycont['edition'] + ' edition,')
346	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
347	entry.append(entrycont['howpublished'] + ',')
348	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
349	eline = entrycont['volume'];
350	if entrycont.has_key('number') and (entrycont['number'] != ''):
351	eline += '(' + entrycont['number'] + ')'
352	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
353	eline += ':' + entrycont['pages']
354	eline += ','
355	entry.append(eline)
356	else:
357	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
358	entry.append('pages ' + entrycont['pages'] + ',')
359	if entrycont.has_key('year') and (entrycont['year'] != ''):
360	if entrycont.has_key('month') and (entrycont['month'] != ''):
361	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
362	else:
363	entry.append(entrycont['year'] + '.')
364	if entrycont.has_key('note') and (entrycont['note'] != ''):
365	entry.append(entrycont['note'] + '.')
366
367	# generate keys for sorting and for the output
368	sortkey = ''
369	bibkey = ''
370	if entrycont.has_key('author'):
371	for author in entrycont['author']['list']:
372	sortkey += copychars(author, author.rfind(' ')+1, len(author))
373	bibkey = entrycont['author']['abbrev']
374	else:
375	bibkey = 'x'
376	if entrycont.has_key('year'):
377	sortkey += entrycont['year']
378	bibkey += entrycont['year'][-2:]
379	if entrycont.has_key('title'):
380	sortkey += entrycont['title']
381	if entrycont.has_key('key'):
382	sortkey = entrycont['key'] + sortkey
383	bibkey = entrycont['key']
384	entry.insert(0, sortkey)
385	entry.insert(1, bibkey)
386	entry.insert(2, entryid)
387
388	# add the entry to the file contents
389	filecont.append(entry)
390
391	else:
392	# field, publication info
393	field = ''
394	data = ''
395
396	# field = {data} entries
397	if bracedata_rex.match(line):
398	field = bracefield_rex.sub('\g<1>', line)
399	field = string.lower(field)
400	data = bracedata_rex.sub('\g<2>', line)
401
402	# field = "data" entries
403	elif quotedata_rex.match(line):
404	field = quotefield_rex.sub('\g<1>', line)
405	field = string.lower(field)
406	data = quotedata_rex.sub('\g<2>', line)
407
408	# field = data entries
409	elif data_rex.match(line):
410	field = field_rex.sub('\g<1>', line)
411	field = string.lower(field)
412	data = data_rex.sub('\g<2>', line)
413
414	if field in ('author', 'editor'):
415	entrycont[field] = bibtexauthor(data)
416	line = ''
417	elif field == 'title':
418	line = bibtextitle(data, entrytype)
419	elif field != '':
420	line = removebraces(transformurls(data.strip()))
421
422	if line != '':
423	line = latexreplacements(line)
424	entrycont[field] = line
425
426
427	# sort entries
428	filecont.sort(entry_cmp)
429
430	# count the bibtex keys
431	keytable = {}
432	counttable = {}
433	for entry in filecont:
434	bibkey = entry[1]
435	if not keytable.has_key(bibkey):
436	keytable[bibkey] = 1
437	else:
438	keytable[bibkey] += 1
439
440	for bibkey in keytable.keys():
441	counttable[bibkey] = 0
442
443	# generate output
444	for entry in filecont:
445	# generate output key form the bibtex key
446	bibkey = entry[1]
447	entryid = entry[2]
448	if keytable[bibkey] == 1:
449	outkey = bibkey
450	else:
451	outkey = bibkey + chr(97 + counttable[bibkey])
452	counttable[bibkey] += 1
453
454	# append the entry code to the output
455	file.append('\\section ' + entryid + ' [' + outkey + ']')
456	file.append('<div style="' + divstyle + '">')
457	for line in entry[3:]:
458	file.append(line)
459	file.append('</div>')
460	file.append('')
461
462	return file
463
464
465	#
466	# return 1 iff abbr is in line but not inside braces or quotes
467	# assumes that abbr appears only once on the line (out of braces and quotes)
468	#
469	def verify_out_of_braces(line, abbr):
470
471	phrase_split = delimiter_rex.split(line)
472
473	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
474
475	open_brace = 0
476	open_quote = 0
477
478	for phrase in phrase_split:
479	if phrase == "{":
480	open_brace = open_brace + 1
481	elif phrase == "}":
482	open_brace = open_brace - 1
483	elif phrase == '"':
484	if open_quote == 1:
485	open_quote = 0
486	else:
487	open_quote = 1
488	elif abbr_rex.search(phrase):
489	if open_brace == 0 and open_quote == 0:
490	return 1
491
492	return 0
493
494
495	#
496	# a line in the form phrase1 # phrase2 # ... # phrasen
497	# is returned as phrase1 phrase2 ... phrasen
498	# with the correct punctuation
499	# Bug: Doesn't always work with multiple abbreviations plugged in
500	#
501	def concat_line(line):
502	# only look at part after equals
503	field = field_rex.sub('\g<1>',line)
504	rest = field_rex.sub('\g<2>',line)
505
506	concat_line = field + ' ='
507
508	pound_split = concatsplit_rex.split(rest)
509
510	phrase_count = 0
511	length = len(pound_split)
512
513	for phrase in pound_split:
514	phrase = phrase.strip()
515	if phrase_count != 0:
516	if phrase.startswith('"') or phrase.startswith('{'):
517	phrase = phrase[1:]
518	elif phrase.startswith('"'):
519	phrase = phrase.replace('"','{',1)
520
521	if phrase_count != length-1:
522	if phrase.endswith('"') or phrase.endswith('}'):
523	phrase = phrase[:-1]
524	else:
525	if phrase.endswith('"'):
526	phrase = phrase[:-1]
527	phrase = phrase + "}"
528	elif phrase.endswith('",'):
529	phrase = phrase[:-2]
530	phrase = phrase + "},"
531
532	# if phrase did have \#, add the \# back
533	if phrase.endswith('\\'):
534	phrase = phrase + "#"
535	concat_line = concat_line + ' ' + phrase
536
537	phrase_count = phrase_count + 1
538
539	return concat_line
540
541
542	#
543	# substitute abbreviations into filecont
544	# @param filecont_source - string of data from file
545	#
546	def bibtex_replace_abbreviations(filecont_source):
547	filecont = filecont_source.splitlines()
548
549	# These are defined in bibtex, so we'll define them too
550	abbr_list = ['jan','feb','mar','apr','may','jun',
551	'jul','aug','sep','oct','nov','dec']
552	value_list = ['January','February','March','April',
553	'May','June','July','August','September',
554	'October','November','December']
555
556	abbr_rex = []
557	total_abbr_count = 0
558
559	front = '\\b'
560	back = '(,?)\\b'
561
562	for x in abbr_list:
563	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
564	total_abbr_count = total_abbr_count + 1
565
566
567	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
568	re.I)
569
570	comment_rex = re.compile('@comment\s*{',re.I)
571	preamble_rex = re.compile('@preamble\s*{',re.I)
572
573	waiting_for_end_string = 0
574	i = 0
575	filecont2 = ''
576
577	for line in filecont:
578	if line == ' ' or line == '':
579	continue
580
581	if waiting_for_end_string:
582	if re.search('}',line):
583	waiting_for_end_string = 0
584	continue
585
586	if abbrdef_rex.search(line):
587	abbr = abbrdef_rex.sub('\g<1>', line)
588
589	if abbr_list.count(abbr) == 0:
590	val = abbrdef_rex.sub('\g<2>', line)
591	abbr_list.append(abbr)
592	value_list.append(string.strip(val))
593	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
594	total_abbr_count = total_abbr_count + 1
595	waiting_for_end_string = 1
596	continue
597
598	if comment_rex.search(line):
599	waiting_for_end_string = 1
600	continue
601
602	if preamble_rex.search(line):
603	waiting_for_end_string = 1
604	continue
605
606
607	# replace subsequent abbreviations with the value
608	abbr_count = 0
609
610	for x in abbr_list:
611
612	if abbr_rex[abbr_count].search(line):
613	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
614	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
615	# Check for # concatenations
616	if concatsplit_rex.search(line):
617	line = concat_line(line)
618	abbr_count = abbr_count + 1
619
620
621	filecont2 = filecont2 + line + '\n'
622	i = i+1
623
624
625	# Do one final pass over file
626
627	# make sure that didn't end up with {" or }" after the substitution
628	filecont2 = filecont2.replace('{"','{{')
629	filecont2 = filecont2.replace('"}','}}')
630
631	afterquotevalue_rex = re.compile('"\s,\s')
632	afterbrace_rex = re.compile('"\s*}')
633	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
634
635	# add new lines to data that changed because of abbreviation substitutions
636	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
637	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
638	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
639
640	return filecont2
641
642	#
643	# convert @type( ... ) to @type{ ... }
644	#
645	def no_outer_parens(filecont):
646
647	# do checking for open parens
648	# will convert to braces
649	paren_split = re.split('([(){}])',filecont)
650
651	open_paren_count = 0
652	open_type = 0
653	look_next = 0
654
655	# rebuild filecont
656	filecont = ''
657
658	at_rex = re.compile('@\w*')
659
660	for phrase in paren_split:
661	if look_next == 1:
662	if phrase == '(':
663	phrase = '{'
664	open_paren_count = open_paren_count + 1
665	else:
666	open_type = 0
667	look_next = 0
668
669	if phrase == '(':
670	open_paren_count = open_paren_count + 1
671
672	elif phrase == ')':
673	open_paren_count = open_paren_count - 1
674	if open_type == 1 and open_paren_count == 0:
675	phrase = '}'
676	open_type = 0
677
678	elif at_rex.search( phrase ):
679	open_type = 1
680	look_next = 1
681
682	filecont = filecont + phrase
683
684	return filecont
685
686
687	#
688	# make all whitespace into just one space
689	# format the bibtex file into a usable form.
690	#
691	def bibtexwasher(filecont_source):
692
693	space_rex = re.compile('\s+')
694	comment_rex = re.compile('\s*%')
695
696	filecont = []
697
698	# remove trailing and excessive whitespace
699	# ignore comments
700	for line in filecont_source:
701	line = string.strip(line)
702	line = space_rex.sub(' ', line)
703	# ignore comments
704	if not comment_rex.match(line) and line != '':
705	filecont.append(' '+ line)
706
707	filecont = string.join(filecont, '')
708
709	# the file is in one long string
710
711	filecont = no_outer_parens(filecont)
712
713	#
714	# split lines according to preferred syntax scheme
715	#
716	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
717
718	# add new lines after commas that are after values
719	filecont = re.sub('"\s*,', '",\n', filecont)
720	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
721	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
722	'\n\n\g<1>\g<2>,\n', filecont)
723
724	# add new lines after }
725	filecont = re.sub('"\s*}','"\n}\n', filecont)
726	filecont = re.sub('}\s*,','},\n', filecont)
727
728
729	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
730
731	# character encoding, reserved latex characters
732	filecont = re.sub('{\\\&}', '&', filecont)
733	filecont = re.sub('\\\&', '&', filecont)
734
735	# do checking for open braces to get format correct
736	open_brace_count = 0
737	brace_split = re.split('([{}])',filecont)
738
739	# rebuild filecont
740	filecont = ''
741
742	for phrase in brace_split:
743	if phrase == '{':
744	open_brace_count = open_brace_count + 1
745	elif phrase == '}':
746	open_brace_count = open_brace_count - 1
747	if open_brace_count == 0:
748	filecont = filecont + '\n'
749
750	filecont = filecont + phrase
751
752	filecont2 = bibtex_replace_abbreviations(filecont)
753
754	# gather
755	filecont = filecont2.splitlines()
756	i=0
757	j=0 # count the number of blank lines
758	for line in filecont:
759	# ignore blank lines
760	if line == '' or line == ' ':
761	j = j+1
762	continue
763	filecont[i] = line + '\n'
764	i = i+1
765
766	# get rid of the extra stuff at the end of the array
767	# (The extra stuff are duplicates that are in the array because
768	# blank lines were removed.)
769	length = len( filecont)
770	filecont[length-j:length] = []
771
772	return filecont
773
774
775	def filehandler(filepath):
776	try:
777	fd = open(filepath, 'r')
778	filecont_source = fd.readlines()
779	fd.close()
780	except:
781	print 'Could not open file:', filepath
782	washeddata = bibtexwasher(filecont_source)
783	outdata = bibtexdecoder(washeddata)
784	print '/**'
785	print '\page references References'
786	print
787	for line in outdata:
788	print line
789	print '*/'
790
791
792	# main program
793
794	def main():
795	import sys
796	if sys.argv[1:]:
797	filepath = sys.argv[1]
798	else:
799	print "No input file"
800	sys.exit()
801	filehandler(filepath)
802
803	if __name__ == "__main__": main()
804
805
806	# end python script

Note: See TracBrowser for help on using the repository browser.

Download in other formats: