COIN-OR::LEMON - Graph Library

Context Navigation

source: lemon/scripts/bib2dox.py @ 790:94ef0a5c0005

Last change on this file since 790:94ef0a5c0005 was 790:94ef0a5c0005, checked in by Peter Kovacs <kpeter@…>, 15 years ago
Add bib->dox converter and initial references.bib (#184)
File size: 25.4 KB

Line
1	#!/usr/bin/env /usr/local/Python/bin/python2.1
2	"""
3	BibTeX to Doxygen converter
4	Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6	This code is the modification of the BibTeX to XML converter
7	by Vidar Bronken Gundersen et al. See the original copyright notices below.
8
9	**********************************************************************
10
11	Decoder for bibliographic data, BibTeX
12	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13
14	v.8
15	(c)2002-06-23 Vidar Bronken Gundersen
16	http://bibtexml.sf.net/
17	Reuse approved as long as this notification is kept.
18	Licence: GPL.
19
20	Contributions/thanks to:
21	Egon Willighagen, http://sf.net/projects/jreferences/
22	Richard Mahoney (for providing a test case)
23
24	Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25	(c) 2003-01-15
26
27	1. Changed bibtex: tags to bibxml: tags.
28	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
29	3. Allow spaces between @type and first {
30	4. "author" fields with multiple authors split by " and "
31	are put in separate xml "bibxml:author" tags.
32	5. Option for Titles: words are capitalized
33	only if first letter in title or capitalized inside braces
34	6. Removes braces from within field values
35	7. Ignores comments in bibtex file (including @comment{ or % )
36	8. Replaces some special latex tags, e.g., replaces ~ with ' '
37	9. Handles bibtex @string abbreviations
38	--> includes bibtex's default abbreviations for months
39	--> does concatenation of abbr # " more " and " more " # abbr
40	10. Handles @type( ... ) or @type{ ... }
41	11. The keywords field is split on , or ; and put into separate xml
42	"bibxml:keywords" tags
43	12. Ignores @preamble
44
45	Known Limitations
46	1. Does not transform Latex encoding like math mode and special
47	latex symbols.
48	2. Does not parse author fields into first and last names.
49	E.g., It does not do anything special to an author whose name is
50	in the form LAST_NAME, FIRST_NAME
51	In "author" tag, will show up as
52	<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53	3. Does not handle "crossref" fields other than to print
54	<bibxml:crossref>...</bibxml:crossref>
55	4. Does not inform user of the input's format errors. You just won't
56	be able to transform the file later with XSL
57
58	You will have to manually edit the XML output if you need to handle
59	these (and unknown) limitations.
60
61	"""
62
63	import string, re
64
65	# set of valid name characters
66	valid_name_chars = '[\w\-:]'
67
68	#
69	# define global regular expression variables
70	#
71	author_rex = re.compile('\s+and\s+')
72	rembraces_rex = re.compile('[{}]')
73	capitalize_rex = re.compile('({\w*})')
74
75	# used by bibtexkeywords(data)
76	keywords_rex = re.compile('[,;]')
77
78	# used by concat_line(line)
79	concatsplit_rex = re.compile('\s#\s')
80
81	# split on {, }, or " in verify_out_of_braces
82	delimiter_rex = re.compile('([{}"])',re.I)
83
84	field_rex = re.compile('\s(\w)\s=\s(.*)')
85	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
86
87	url_rex = re.compile('\\\url\{([^}]*)\}')
88
89
90	#
91	# return the string parameter without braces
92	#
93	def transformurls(str):
94	return url_rex.sub(r'<a href="\1">\1</a>', str)
95
96	#
97	# return the string parameter without braces
98	#
99	def removebraces(str):
100	return rembraces_rex.sub('', str)
101
102	#
103	# latex-specific replacements
104	# (do this after braces were removed)
105	#
106	def latexreplacements(line):
107	line = string.replace(line, '~', ' ')
108	line = string.replace(line, '\\\'a', 'á')
109	line = string.replace(line, '\\"a', 'ä')
110	line = string.replace(line, '\\\'e', 'é')
111	line = string.replace(line, '\\"e', 'ë')
112	line = string.replace(line, '\\\'i', 'í')
113	line = string.replace(line, '\\"i', 'ï')
114	line = string.replace(line, '\\\'o', 'ó')
115	line = string.replace(line, '\\"o', 'ö')
116	line = string.replace(line, '\\\'u', 'ú')
117	line = string.replace(line, '\\"u', 'ü')
118	line = string.replace(line, '\\H o', 'õ')
119	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
120	line = string.replace(line, '\\\'A', 'Á')
121	line = string.replace(line, '\\"A', 'Ä')
122	line = string.replace(line, '\\\'E', 'É')
123	line = string.replace(line, '\\"E', 'Ë')
124	line = string.replace(line, '\\\'I', 'Í')
125	line = string.replace(line, '\\"I', 'Ï')
126	line = string.replace(line, '\\\'O', 'Ó')
127	line = string.replace(line, '\\"O', 'Ö')
128	line = string.replace(line, '\\\'U', 'Ú')
129	line = string.replace(line, '\\"U', 'Ü')
130	line = string.replace(line, '\\H O', 'Õ')
131	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
132
133	return line
134
135	#
136	# copy characters form a string decoding html expressions (&xyz;)
137	#
138	def copychars(str, ifrom, count):
139	result = ''
140	i = ifrom
141	c = 0
142	html_spec = False
143	while (i < len(str)) and (c < count):
144	if str[i] == '&':
145	html_spec = True;
146	if i+1 < len(str):
147	result += str[i+1]
148	c += 1
149	i += 2
150	else:
151	if not html_spec:
152	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
153	((str[i] >= 'a') and (str[i] <= 'z')):
154	result += str[i]
155	c += 1
156	elif str[i] == ';':
157	html_spec = False;
158	i += 1
159
160	return result
161
162
163	#
164	# Handle a list of authors (separated by 'and').
165	# It gives back an array of the follwing values:
166	# - num: the number of authors,
167	# - list: the list of the author names,
168	# - text: the bibtex text (separated by commas and/or 'and')
169	# - abbrev: abbreviation that can be used for indicate the
170	# bibliography entries
171	#
172	def bibtexauthor(data):
173	result = {}
174	bibtex = ''
175	result['list'] = author_rex.split(data)
176	result['num'] = len(result['list'])
177	for i, author in enumerate(result['list']):
178	# general transformations
179	author = latexreplacements(removebraces(author.strip()))
180	# transform "Xyz, A. B." to "A. B. Xyz"
181	pos = author.find(',')
182	if pos != -1:
183	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
184	result['list'][i] = author
185	bibtex += author + '#'
186	bibtex = bibtex[:-1]
187	if result['num'] > 1:
188	ix = bibtex.rfind('#')
189	if result['num'] == 2:
190	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
191	else:
192	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
193	bibtex = bibtex.replace('#', ', ')
194	result['text'] = bibtex
195
196	result['abbrev'] = ''
197	for author in result['list']:
198	pos = author.rfind(' ') + 1
199	count = 1
200	if result['num'] == 1:
201	count = 3
202	result['abbrev'] += copychars(author, pos, count)
203
204	return result
205
206
207	#
208	# data = title string
209	# @return the capitalized title (first letter is capitalized), rest are capitalized
210	# only if capitalized inside braces
211	#
212	def capitalizetitle(data):
213	title_list = capitalize_rex.split(data)
214	title = ''
215	count = 0
216	for phrase in title_list:
217	check = string.lstrip(phrase)
218
219	# keep phrase's capitalization the same
220	if check.find('{') == 0:
221	title += removebraces(phrase)
222	else:
223	# first word --> capitalize first letter (after spaces)
224	if count == 0:
225	title += check.capitalize()
226	else:
227	title += phrase.lower()
228	count = count + 1
229
230	return title
231
232
233	#
234	# @return the bibtex for the title
235	# @param data --> title string
236	# braces are removed from title
237	#
238	def bibtextitle(data, entrytype):
239	if entrytype in ('book', 'inbook'):
240	title = removebraces(data.strip())
241	else:
242	title = removebraces(capitalizetitle(data.strip()))
243	bibtex = title
244	return bibtex
245
246
247	#
248	# function to compare entry lists
249	#
250	def entry_cmp(x, y):
251	return cmp(x[0], y[0])
252
253
254	#
255	# print the XML for the transformed "filecont_source"
256	#
257	def bibtexdecoder(filecont_source):
258	filecont = []
259	file = []
260
261	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
262	pubtype_rex = re.compile('@(\w)\s{\s(.),')
263	endtype_rex = re.compile('}\s*$')
264	endtag_rex = re.compile('^\s}\s$')
265
266	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
267	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
268
269	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
270	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
271
272	for line in filecont_source:
273	line = line[:-1]
274
275	# encode character entities
276	line = string.replace(line, '&', '&')
277	line = string.replace(line, '<', '<')
278	line = string.replace(line, '>', '>')
279
280	# start entry: publication type (store for later use)
281	if pubtype_rex.match(line):
282	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
283	entrycont = {}
284	entry = []
285	entrytype = pubtype_rex.sub('\g<1>',line)
286	entrytype = string.lower(entrytype)
287	# entryid = pubtype_rex.sub('\g<2>', line)
288
289	# end entry if just a }
290	elif endtype_rex.match(line):
291	# generate doxygen code for the entry
292
293	# enty type related formattings
294	if entrytype in ('book', 'inbook'):
295	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
296	if not entrycont.has_key('author'):
297	entrycont['author'] = entrycont['editor']
298	entrycont['author']['text'] += ', editors'
299	elif entrytype == 'article':
300	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
301	elif entrytype in ('inproceedings', 'incollection', 'conference'):
302	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
303	elif entrytype == 'techreport':
304	if not entrycont.has_key('type'):
305	entrycont['type'] = 'Technical report'
306	elif entrytype == 'mastersthesis':
307	entrycont['type'] = 'Master\'s thesis'
308	elif entrytype == 'phdthesis':
309	entrycont['type'] = 'PhD thesis'
310
311	for eline in entrycont:
312	if eline != '':
313	eline = latexreplacements(eline)
314
315	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
316	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
317
318	if entrycont.has_key('author') and (entrycont['author'] != ''):
319	entry.append(entrycont['author']['text'] + '.')
320	if entrycont.has_key('title') and (entrycont['title'] != ''):
321	entry.append(entrycont['title'] + '.')
322	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
323	entry.append(entrycont['journal'] + ',')
324	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
325	entry.append('In ' + entrycont['booktitle'] + ',')
326	if entrycont.has_key('type') and (entrycont['type'] != ''):
327	eline = entrycont['type']
328	if entrycont.has_key('number') and (entrycont['number'] != ''):
329	eline += ' ' + entrycont['number']
330	eline += ','
331	entry.append(eline)
332	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
333	entry.append(entrycont['institution'] + ',')
334	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
335	entry.append(entrycont['publisher'] + ',')
336	if entrycont.has_key('school') and (entrycont['school'] != ''):
337	entry.append(entrycont['school'] + ',')
338	if entrycont.has_key('address') and (entrycont['address'] != ''):
339	entry.append(entrycont['address'] + ',')
340	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
341	entry.append(entrycont['edition'] + ' edition,')
342	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
343	entry.append(entrycont['howpublished'] + ',')
344	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
345	eline = entrycont['volume'];
346	if entrycont.has_key('number') and (entrycont['number'] != ''):
347	eline += '(' + entrycont['number'] + ')'
348	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
349	eline += ':' + entrycont['pages']
350	eline += ','
351	entry.append(eline)
352	else:
353	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
354	entry.append('pages ' + entrycont['pages'] + ',')
355	if entrycont.has_key('year') and (entrycont['year'] != ''):
356	if entrycont.has_key('month') and (entrycont['month'] != ''):
357	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
358	else:
359	entry.append(entrycont['year'] + '.')
360	if entrycont.has_key('note') and (entrycont['note'] != ''):
361	entry.append(entrycont['note'] + '.')
362
363	# generate keys for sorting and for the output
364	sortkey = ''
365	bibkey = ''
366	if entrycont.has_key('author'):
367	for author in entrycont['author']['list']:
368	sortkey += copychars(author, author.rfind(' ')+1, len(author))
369	bibkey = entrycont['author']['abbrev']
370	else:
371	bibkey = 'x'
372	if entrycont.has_key('year'):
373	sortkey += entrycont['year']
374	bibkey += entrycont['year'][-2:]
375	if entrycont.has_key('title'):
376	sortkey += entrycont['title']
377	if entrycont.has_key('key'):
378	sortkey = entrycont['key'] + sortkey
379	bibkey = entrycont['key']
380	entry.insert(0, sortkey)
381	entry.insert(1, bibkey)
382
383	# add the entry to the file contents
384	filecont.append(entry)
385
386	else:
387	# field, publication info
388	field = ''
389	data = ''
390
391	# field = {data} entries
392	if bracedata_rex.match(line):
393	field = bracefield_rex.sub('\g<1>', line)
394	field = string.lower(field)
395	data = bracedata_rex.sub('\g<2>', line)
396
397	# field = "data" entries
398	elif quotedata_rex.match(line):
399	field = quotefield_rex.sub('\g<1>', line)
400	field = string.lower(field)
401	data = quotedata_rex.sub('\g<2>', line)
402
403	# field = data entries
404	elif data_rex.match(line):
405	field = field_rex.sub('\g<1>', line)
406	field = string.lower(field)
407	data = data_rex.sub('\g<2>', line)
408
409	if field in ('author', 'editor'):
410	entrycont[field] = bibtexauthor(data)
411	line = ''
412	elif field == 'title':
413	line = bibtextitle(data, entrytype)
414	elif field != '':
415	line = removebraces(transformurls(data.strip()))
416
417	if line != '':
418	line = latexreplacements(line)
419	entrycont[field] = line
420
421
422	# sort entries
423	filecont.sort(entry_cmp)
424
425	# count the bibtex keys
426	keytable = {}
427	counttable = {}
428	for entry in filecont:
429	bibkey = entry[1]
430	if not keytable.has_key(bibkey):
431	keytable[bibkey] = 1
432	else:
433	keytable[bibkey] += 1
434
435	for bibkey in keytable.keys():
436	counttable[bibkey] = 0
437
438	# generate output
439	for entry in filecont:
440	# generate output key form the bibtex key
441	bibkey = entry[1]
442	if keytable[bibkey] == 1:
443	outkey = bibkey
444	else:
445	outkey = bibkey + chr(97 + counttable[bibkey])
446	counttable[bibkey] += 1
447
448	# append the entry code to the output
449	file.append('<tr valign="top">\n' + \
450	'<td>[' + outkey + ']</td>')
451	file.append('<td>')
452	file.append('\\anchor ' + outkey)
453	for line in entry[2:]:
454	file.append(line)
455	file.append('</td>\n</tr>')
456	file.append('')
457
458	return file
459
460
461	#
462	# return 1 iff abbr is in line but not inside braces or quotes
463	# assumes that abbr appears only once on the line (out of braces and quotes)
464	#
465	def verify_out_of_braces(line, abbr):
466
467	phrase_split = delimiter_rex.split(line)
468
469	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
470
471	open_brace = 0
472	open_quote = 0
473
474	for phrase in phrase_split:
475	if phrase == "{":
476	open_brace = open_brace + 1
477	elif phrase == "}":
478	open_brace = open_brace - 1
479	elif phrase == '"':
480	if open_quote == 1:
481	open_quote = 0
482	else:
483	open_quote = 1
484	elif abbr_rex.search(phrase):
485	if open_brace == 0 and open_quote == 0:
486	return 1
487
488	return 0
489
490
491	#
492	# a line in the form phrase1 # phrase2 # ... # phrasen
493	# is returned as phrase1 phrase2 ... phrasen
494	# with the correct punctuation
495	# Bug: Doesn't always work with multiple abbreviations plugged in
496	#
497	def concat_line(line):
498	# only look at part after equals
499	field = field_rex.sub('\g<1>',line)
500	rest = field_rex.sub('\g<2>',line)
501
502	concat_line = field + ' ='
503
504	pound_split = concatsplit_rex.split(rest)
505
506	phrase_count = 0
507	length = len(pound_split)
508
509	for phrase in pound_split:
510	phrase = phrase.strip()
511	if phrase_count != 0:
512	if phrase.startswith('"') or phrase.startswith('{'):
513	phrase = phrase[1:]
514	elif phrase.startswith('"'):
515	phrase = phrase.replace('"','{',1)
516
517	if phrase_count != length-1:
518	if phrase.endswith('"') or phrase.endswith('}'):
519	phrase = phrase[:-1]
520	else:
521	if phrase.endswith('"'):
522	phrase = phrase[:-1]
523	phrase = phrase + "}"
524	elif phrase.endswith('",'):
525	phrase = phrase[:-2]
526	phrase = phrase + "},"
527
528	# if phrase did have \#, add the \# back
529	if phrase.endswith('\\'):
530	phrase = phrase + "#"
531	concat_line = concat_line + ' ' + phrase
532
533	phrase_count = phrase_count + 1
534
535	return concat_line
536
537
538	#
539	# substitute abbreviations into filecont
540	# @param filecont_source - string of data from file
541	#
542	def bibtex_replace_abbreviations(filecont_source):
543	filecont = filecont_source.splitlines()
544
545	# These are defined in bibtex, so we'll define them too
546	abbr_list = ['jan','feb','mar','apr','may','jun',
547	'jul','aug','sep','oct','nov','dec']
548	value_list = ['January','February','March','April',
549	'May','June','July','August','September',
550	'October','November','December']
551
552	abbr_rex = []
553	total_abbr_count = 0
554
555	front = '\\b'
556	back = '(,?)\\b'
557
558	for x in abbr_list:
559	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
560	total_abbr_count = total_abbr_count + 1
561
562
563	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
564	re.I)
565
566	comment_rex = re.compile('@comment\s*{',re.I)
567	preamble_rex = re.compile('@preamble\s*{',re.I)
568
569	waiting_for_end_string = 0
570	i = 0
571	filecont2 = ''
572
573	for line in filecont:
574	if line == ' ' or line == '':
575	continue
576
577	if waiting_for_end_string:
578	if re.search('}',line):
579	waiting_for_end_string = 0
580	continue
581
582	if abbrdef_rex.search(line):
583	abbr = abbrdef_rex.sub('\g<1>', line)
584
585	if abbr_list.count(abbr) == 0:
586	val = abbrdef_rex.sub('\g<2>', line)
587	abbr_list.append(abbr)
588	value_list.append(string.strip(val))
589	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
590	total_abbr_count = total_abbr_count + 1
591	waiting_for_end_string = 1
592	continue
593
594	if comment_rex.search(line):
595	waiting_for_end_string = 1
596	continue
597
598	if preamble_rex.search(line):
599	waiting_for_end_string = 1
600	continue
601
602
603	# replace subsequent abbreviations with the value
604	abbr_count = 0
605
606	for x in abbr_list:
607
608	if abbr_rex[abbr_count].search(line):
609	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
610	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
611	# Check for # concatenations
612	if concatsplit_rex.search(line):
613	line = concat_line(line)
614	abbr_count = abbr_count + 1
615
616
617	filecont2 = filecont2 + line + '\n'
618	i = i+1
619
620
621	# Do one final pass over file
622
623	# make sure that didn't end up with {" or }" after the substitution
624	filecont2 = filecont2.replace('{"','{{')
625	filecont2 = filecont2.replace('"}','}}')
626
627	afterquotevalue_rex = re.compile('"\s,\s')
628	afterbrace_rex = re.compile('"\s*}')
629	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
630
631	# add new lines to data that changed because of abbreviation substitutions
632	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
633	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
634	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
635
636	return filecont2
637
638	#
639	# convert @type( ... ) to @type{ ... }
640	#
641	def no_outer_parens(filecont):
642
643	# do checking for open parens
644	# will convert to braces
645	paren_split = re.split('([(){}])',filecont)
646
647	open_paren_count = 0
648	open_type = 0
649	look_next = 0
650
651	# rebuild filecont
652	filecont = ''
653
654	at_rex = re.compile('@\w*')
655
656	for phrase in paren_split:
657	if look_next == 1:
658	if phrase == '(':
659	phrase = '{'
660	open_paren_count = open_paren_count + 1
661	else:
662	open_type = 0
663	look_next = 0
664
665	if phrase == '(':
666	open_paren_count = open_paren_count + 1
667
668	elif phrase == ')':
669	open_paren_count = open_paren_count - 1
670	if open_type == 1 and open_paren_count == 0:
671	phrase = '}'
672	open_type = 0
673
674	elif at_rex.search( phrase ):
675	open_type = 1
676	look_next = 1
677
678	filecont = filecont + phrase
679
680	return filecont
681
682
683	#
684	# make all whitespace into just one space
685	# format the bibtex file into a usable form.
686	#
687	def bibtexwasher(filecont_source):
688
689	space_rex = re.compile('\s+')
690	comment_rex = re.compile('\s*%')
691
692	filecont = []
693
694	# remove trailing and excessive whitespace
695	# ignore comments
696	for line in filecont_source:
697	line = string.strip(line)
698	line = space_rex.sub(' ', line)
699	# ignore comments
700	if not comment_rex.match(line) and line != '':
701	filecont.append(' '+ line)
702
703	filecont = string.join(filecont, '')
704
705	# the file is in one long string
706
707	filecont = no_outer_parens(filecont)
708
709	#
710	# split lines according to preferred syntax scheme
711	#
712	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
713
714	# add new lines after commas that are after values
715	filecont = re.sub('"\s*,', '",\n', filecont)
716	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
717	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
718	'\n\n\g<1>\g<2>,\n', filecont)
719
720	# add new lines after }
721	filecont = re.sub('"\s*}','"\n}\n', filecont)
722	filecont = re.sub('}\s*,','},\n', filecont)
723
724
725	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
726
727	# character encoding, reserved latex characters
728	filecont = re.sub('{\\\&}', '&', filecont)
729	filecont = re.sub('\\\&', '&', filecont)
730
731	# do checking for open braces to get format correct
732	open_brace_count = 0
733	brace_split = re.split('([{}])',filecont)
734
735	# rebuild filecont
736	filecont = ''
737
738	for phrase in brace_split:
739	if phrase == '{':
740	open_brace_count = open_brace_count + 1
741	elif phrase == '}':
742	open_brace_count = open_brace_count - 1
743	if open_brace_count == 0:
744	filecont = filecont + '\n'
745
746	filecont = filecont + phrase
747
748	filecont2 = bibtex_replace_abbreviations(filecont)
749
750	# gather
751	filecont = filecont2.splitlines()
752	i=0
753	j=0 # count the number of blank lines
754	for line in filecont:
755	# ignore blank lines
756	if line == '' or line == ' ':
757	j = j+1
758	continue
759	filecont[i] = line + '\n'
760	i = i+1
761
762	# get rid of the extra stuff at the end of the array
763	# (The extra stuff are duplicates that are in the array because
764	# blank lines were removed.)
765	length = len( filecont)
766	filecont[length-j:length] = []
767
768	return filecont
769
770
771	def filehandler(filepath):
772	try:
773	fd = open(filepath, 'r')
774	filecont_source = fd.readlines()
775	fd.close()
776	except:
777	print 'Could not open file:', filepath
778	washeddata = bibtexwasher(filecont_source)
779	outdata = bibtexdecoder(washeddata)
780	print '/**'
781	print '\page references References'
782	print
783	print '<table border="0" cellspacing="5px" width="100%">'
784	print
785	for line in outdata:
786	print line
787	print '</table>'
788	print
789	print '*/'
790
791
792	# main program
793
794	def main():
795	import sys
796	if sys.argv[1:]:
797	filepath = sys.argv[1]
798	else:
799	print "No input file"
800	sys.exit()
801	filehandler(filepath)
802
803	if __name__ == "__main__": main()
804
805
806	# end python script

Note: See TracBrowser for help on using the repository browser.

Download in other formats: