COIN-OR::LEMON - Graph Library

Context Navigation

source: lemon-main/scripts/bib2dox.py @ 921:140c953ad5d1

Last change on this file since 921:140c953ad5d1 was 836:c841ae1aca29, checked in by Peter Kovacs <kpeter@…>, 16 years ago
Modify the header of scripts/bib2dox.py (#184)
Property exe set to ``*
File size: 25.7 KB

Line
1	#! /usr/bin/env python
2	"""
3	BibTeX to Doxygen converter
4	Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6	This file is a part of LEMON, a generic C++ optimization library.
7
8	**********************************************************************
9
10	This code is the modification of the BibTeX to XML converter
11	by Vidar Bronken Gundersen et al.
12	See the original copyright notices below.
13
14	**********************************************************************
15
16	Decoder for bibliographic data, BibTeX
17	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
18
19	v.8
20	(c)2002-06-23 Vidar Bronken Gundersen
21	http://bibtexml.sf.net/
22	Reuse approved as long as this notification is kept.
23	Licence: GPL.
24
25	Contributions/thanks to:
26	Egon Willighagen, http://sf.net/projects/jreferences/
27	Richard Mahoney (for providing a test case)
28
29	Editted by Sara Sprenkle to be more robust and handle more bibtex features.
30	(c) 2003-01-15
31
32	1. Changed bibtex: tags to bibxml: tags.
33	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
34	3. Allow spaces between @type and first {
35	4. "author" fields with multiple authors split by " and "
36	are put in separate xml "bibxml:author" tags.
37	5. Option for Titles: words are capitalized
38	only if first letter in title or capitalized inside braces
39	6. Removes braces from within field values
40	7. Ignores comments in bibtex file (including @comment{ or % )
41	8. Replaces some special latex tags, e.g., replaces ~ with ' '
42	9. Handles bibtex @string abbreviations
43	--> includes bibtex's default abbreviations for months
44	--> does concatenation of abbr # " more " and " more " # abbr
45	10. Handles @type( ... ) or @type{ ... }
46	11. The keywords field is split on , or ; and put into separate xml
47	"bibxml:keywords" tags
48	12. Ignores @preamble
49
50	Known Limitations
51	1. Does not transform Latex encoding like math mode and special
52	latex symbols.
53	2. Does not parse author fields into first and last names.
54	E.g., It does not do anything special to an author whose name is
55	in the form LAST_NAME, FIRST_NAME
56	In "author" tag, will show up as
57	<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
58	3. Does not handle "crossref" fields other than to print
59	<bibxml:crossref>...</bibxml:crossref>
60	4. Does not inform user of the input's format errors. You just won't
61	be able to transform the file later with XSL
62
63	You will have to manually edit the XML output if you need to handle
64	these (and unknown) limitations.
65
66	"""
67
68	import string, re
69
70	# set of valid name characters
71	valid_name_chars = '[\w\-:]'
72
73	#
74	# define global regular expression variables
75	#
76	author_rex = re.compile('\s+and\s+')
77	rembraces_rex = re.compile('[{}]')
78	capitalize_rex = re.compile('({[^}]*})')
79
80	# used by bibtexkeywords(data)
81	keywords_rex = re.compile('[,;]')
82
83	# used by concat_line(line)
84	concatsplit_rex = re.compile('\s#\s')
85
86	# split on {, }, or " in verify_out_of_braces
87	delimiter_rex = re.compile('([{}"])',re.I)
88
89	field_rex = re.compile('\s(\w)\s=\s(.*)')
90	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
91
92	url_rex = re.compile('\\\url\{([^}]*)\}')
93
94	#
95	# styles for html formatting
96	#
97	divstyle = 'margin-top: -4ex; margin-left: 8em;'
98
99	#
100	# return the string parameter without braces
101	#
102	def transformurls(str):
103	return url_rex.sub(r'<a href="\1">\1</a>', str)
104
105	#
106	# return the string parameter without braces
107	#
108	def removebraces(str):
109	return rembraces_rex.sub('', str)
110
111	#
112	# latex-specific replacements
113	# (do this after braces were removed)
114	#
115	def latexreplacements(line):
116	line = string.replace(line, '~', ' ')
117	line = string.replace(line, '\\\'a', 'á')
118	line = string.replace(line, '\\"a', 'ä')
119	line = string.replace(line, '\\\'e', 'é')
120	line = string.replace(line, '\\"e', 'ë')
121	line = string.replace(line, '\\\'i', 'í')
122	line = string.replace(line, '\\"i', 'ï')
123	line = string.replace(line, '\\\'o', 'ó')
124	line = string.replace(line, '\\"o', 'ö')
125	line = string.replace(line, '\\\'u', 'ú')
126	line = string.replace(line, '\\"u', 'ü')
127	line = string.replace(line, '\\H o', 'õ')
128	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
129	line = string.replace(line, '\\\'A', 'Á')
130	line = string.replace(line, '\\"A', 'Ä')
131	line = string.replace(line, '\\\'E', 'É')
132	line = string.replace(line, '\\"E', 'Ë')
133	line = string.replace(line, '\\\'I', 'Í')
134	line = string.replace(line, '\\"I', 'Ï')
135	line = string.replace(line, '\\\'O', 'Ó')
136	line = string.replace(line, '\\"O', 'Ö')
137	line = string.replace(line, '\\\'U', 'Ú')
138	line = string.replace(line, '\\"U', 'Ü')
139	line = string.replace(line, '\\H O', 'Õ')
140	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
141
142	return line
143
144	#
145	# copy characters form a string decoding html expressions (&xyz;)
146	#
147	def copychars(str, ifrom, count):
148	result = ''
149	i = ifrom
150	c = 0
151	html_spec = False
152	while (i < len(str)) and (c < count):
153	if str[i] == '&':
154	html_spec = True;
155	if i+1 < len(str):
156	result += str[i+1]
157	c += 1
158	i += 2
159	else:
160	if not html_spec:
161	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
162	((str[i] >= 'a') and (str[i] <= 'z')):
163	result += str[i]
164	c += 1
165	elif str[i] == ';':
166	html_spec = False;
167	i += 1
168
169	return result
170
171
172	#
173	# Handle a list of authors (separated by 'and').
174	# It gives back an array of the follwing values:
175	# - num: the number of authors,
176	# - list: the list of the author names,
177	# - text: the bibtex text (separated by commas and/or 'and')
178	# - abbrev: abbreviation that can be used for indicate the
179	# bibliography entries
180	#
181	def bibtexauthor(data):
182	result = {}
183	bibtex = ''
184	result['list'] = author_rex.split(data)
185	result['num'] = len(result['list'])
186	for i, author in enumerate(result['list']):
187	# general transformations
188	author = latexreplacements(removebraces(author.strip()))
189	# transform "Xyz, A. B." to "A. B. Xyz"
190	pos = author.find(',')
191	if pos != -1:
192	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
193	result['list'][i] = author
194	bibtex += author + '#'
195	bibtex = bibtex[:-1]
196	if result['num'] > 1:
197	ix = bibtex.rfind('#')
198	if result['num'] == 2:
199	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
200	else:
201	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
202	bibtex = bibtex.replace('#', ', ')
203	result['text'] = bibtex
204
205	result['abbrev'] = ''
206	for author in result['list']:
207	pos = author.rfind(' ') + 1
208	count = 1
209	if result['num'] == 1:
210	count = 3
211	result['abbrev'] += copychars(author, pos, count)
212
213	return result
214
215
216	#
217	# data = title string
218	# @return the capitalized title (first letter is capitalized), rest are capitalized
219	# only if capitalized inside braces
220	#
221	def capitalizetitle(data):
222	title_list = capitalize_rex.split(data)
223	title = ''
224	count = 0
225	for phrase in title_list:
226	check = string.lstrip(phrase)
227
228	# keep phrase's capitalization the same
229	if check.find('{') == 0:
230	title += removebraces(phrase)
231	else:
232	# first word --> capitalize first letter (after spaces)
233	if count == 0:
234	title += check.capitalize()
235	else:
236	title += phrase.lower()
237	count = count + 1
238
239	return title
240
241
242	#
243	# @return the bibtex for the title
244	# @param data --> title string
245	# braces are removed from title
246	#
247	def bibtextitle(data, entrytype):
248	if entrytype in ('book', 'inbook'):
249	title = removebraces(data.strip())
250	else:
251	title = removebraces(capitalizetitle(data.strip()))
252	bibtex = title
253	return bibtex
254
255
256	#
257	# function to compare entry lists
258	#
259	def entry_cmp(x, y):
260	return cmp(x[0], y[0])
261
262
263	#
264	# print the XML for the transformed "filecont_source"
265	#
266	def bibtexdecoder(filecont_source):
267	filecont = []
268	file = []
269
270	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
271	pubtype_rex = re.compile('@(\w)\s{\s(.),')
272	endtype_rex = re.compile('}\s*$')
273	endtag_rex = re.compile('^\s}\s$')
274
275	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
276	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
277
278	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
279	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
280
281	for line in filecont_source:
282	line = line[:-1]
283
284	# encode character entities
285	line = string.replace(line, '&', '&')
286	line = string.replace(line, '<', '<')
287	line = string.replace(line, '>', '>')
288
289	# start entry: publication type (store for later use)
290	if pubtype_rex.match(line):
291	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
292	entrycont = {}
293	entry = []
294	entrytype = pubtype_rex.sub('\g<1>',line)
295	entrytype = string.lower(entrytype)
296	entryid = pubtype_rex.sub('\g<2>', line)
297
298	# end entry if just a }
299	elif endtype_rex.match(line):
300	# generate doxygen code for the entry
301
302	# enty type related formattings
303	if entrytype in ('book', 'inbook'):
304	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
305	if not entrycont.has_key('author'):
306	entrycont['author'] = entrycont['editor']
307	entrycont['author']['text'] += ', editors'
308	elif entrytype == 'article':
309	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
310	elif entrytype in ('inproceedings', 'incollection', 'conference'):
311	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
312	elif entrytype == 'techreport':
313	if not entrycont.has_key('type'):
314	entrycont['type'] = 'Technical report'
315	elif entrytype == 'mastersthesis':
316	entrycont['type'] = 'Master\'s thesis'
317	elif entrytype == 'phdthesis':
318	entrycont['type'] = 'PhD thesis'
319
320	for eline in entrycont:
321	if eline != '':
322	eline = latexreplacements(eline)
323
324	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
325	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
326
327	if entrycont.has_key('author') and (entrycont['author'] != ''):
328	entry.append(entrycont['author']['text'] + '.')
329	if entrycont.has_key('title') and (entrycont['title'] != ''):
330	entry.append(entrycont['title'] + '.')
331	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
332	entry.append(entrycont['journal'] + ',')
333	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
334	entry.append('In ' + entrycont['booktitle'] + ',')
335	if entrycont.has_key('type') and (entrycont['type'] != ''):
336	eline = entrycont['type']
337	if entrycont.has_key('number') and (entrycont['number'] != ''):
338	eline += ' ' + entrycont['number']
339	eline += ','
340	entry.append(eline)
341	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
342	entry.append(entrycont['institution'] + ',')
343	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
344	entry.append(entrycont['publisher'] + ',')
345	if entrycont.has_key('school') and (entrycont['school'] != ''):
346	entry.append(entrycont['school'] + ',')
347	if entrycont.has_key('address') and (entrycont['address'] != ''):
348	entry.append(entrycont['address'] + ',')
349	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
350	entry.append(entrycont['edition'] + ' edition,')
351	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
352	entry.append(entrycont['howpublished'] + ',')
353	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
354	eline = entrycont['volume'];
355	if entrycont.has_key('number') and (entrycont['number'] != ''):
356	eline += '(' + entrycont['number'] + ')'
357	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
358	eline += ':' + entrycont['pages']
359	eline += ','
360	entry.append(eline)
361	else:
362	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
363	entry.append('pages ' + entrycont['pages'] + ',')
364	if entrycont.has_key('year') and (entrycont['year'] != ''):
365	if entrycont.has_key('month') and (entrycont['month'] != ''):
366	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
367	else:
368	entry.append(entrycont['year'] + '.')
369	if entrycont.has_key('note') and (entrycont['note'] != ''):
370	entry.append(entrycont['note'] + '.')
371	if entrycont.has_key('url') and (entrycont['url'] != ''):
372	entry.append(entrycont['url'] + '.')
373
374	# generate keys for sorting and for the output
375	sortkey = ''
376	bibkey = ''
377	if entrycont.has_key('author'):
378	for author in entrycont['author']['list']:
379	sortkey += copychars(author, author.rfind(' ')+1, len(author))
380	bibkey = entrycont['author']['abbrev']
381	else:
382	bibkey = 'x'
383	if entrycont.has_key('year'):
384	sortkey += entrycont['year']
385	bibkey += entrycont['year'][-2:]
386	if entrycont.has_key('title'):
387	sortkey += entrycont['title']
388	if entrycont.has_key('key'):
389	sortkey = entrycont['key'] + sortkey
390	bibkey = entrycont['key']
391	entry.insert(0, sortkey)
392	entry.insert(1, bibkey)
393	entry.insert(2, entryid)
394
395	# add the entry to the file contents
396	filecont.append(entry)
397
398	else:
399	# field, publication info
400	field = ''
401	data = ''
402
403	# field = {data} entries
404	if bracedata_rex.match(line):
405	field = bracefield_rex.sub('\g<1>', line)
406	field = string.lower(field)
407	data = bracedata_rex.sub('\g<2>', line)
408
409	# field = "data" entries
410	elif quotedata_rex.match(line):
411	field = quotefield_rex.sub('\g<1>', line)
412	field = string.lower(field)
413	data = quotedata_rex.sub('\g<2>', line)
414
415	# field = data entries
416	elif data_rex.match(line):
417	field = field_rex.sub('\g<1>', line)
418	field = string.lower(field)
419	data = data_rex.sub('\g<2>', line)
420
421	if field == 'url':
422	data = '\\url{' + data.strip() + '}'
423
424	if field in ('author', 'editor'):
425	entrycont[field] = bibtexauthor(data)
426	line = ''
427	elif field == 'title':
428	line = bibtextitle(data, entrytype)
429	elif field != '':
430	line = removebraces(transformurls(data.strip()))
431
432	if line != '':
433	line = latexreplacements(line)
434	entrycont[field] = line
435
436
437	# sort entries
438	filecont.sort(entry_cmp)
439
440	# count the bibtex keys
441	keytable = {}
442	counttable = {}
443	for entry in filecont:
444	bibkey = entry[1]
445	if not keytable.has_key(bibkey):
446	keytable[bibkey] = 1
447	else:
448	keytable[bibkey] += 1
449
450	for bibkey in keytable.keys():
451	counttable[bibkey] = 0
452
453	# generate output
454	for entry in filecont:
455	# generate output key form the bibtex key
456	bibkey = entry[1]
457	entryid = entry[2]
458	if keytable[bibkey] == 1:
459	outkey = bibkey
460	else:
461	outkey = bibkey + chr(97 + counttable[bibkey])
462	counttable[bibkey] += 1
463
464	# append the entry code to the output
465	file.append('\\section ' + entryid + ' [' + outkey + ']')
466	file.append('<div style="' + divstyle + '">')
467	for line in entry[3:]:
468	file.append(line)
469	file.append('</div>')
470	file.append('')
471
472	return file
473
474
475	#
476	# return 1 iff abbr is in line but not inside braces or quotes
477	# assumes that abbr appears only once on the line (out of braces and quotes)
478	#
479	def verify_out_of_braces(line, abbr):
480
481	phrase_split = delimiter_rex.split(line)
482
483	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
484
485	open_brace = 0
486	open_quote = 0
487
488	for phrase in phrase_split:
489	if phrase == "{":
490	open_brace = open_brace + 1
491	elif phrase == "}":
492	open_brace = open_brace - 1
493	elif phrase == '"':
494	if open_quote == 1:
495	open_quote = 0
496	else:
497	open_quote = 1
498	elif abbr_rex.search(phrase):
499	if open_brace == 0 and open_quote == 0:
500	return 1
501
502	return 0
503
504
505	#
506	# a line in the form phrase1 # phrase2 # ... # phrasen
507	# is returned as phrase1 phrase2 ... phrasen
508	# with the correct punctuation
509	# Bug: Doesn't always work with multiple abbreviations plugged in
510	#
511	def concat_line(line):
512	# only look at part after equals
513	field = field_rex.sub('\g<1>',line)
514	rest = field_rex.sub('\g<2>',line)
515
516	concat_line = field + ' ='
517
518	pound_split = concatsplit_rex.split(rest)
519
520	phrase_count = 0
521	length = len(pound_split)
522
523	for phrase in pound_split:
524	phrase = phrase.strip()
525	if phrase_count != 0:
526	if phrase.startswith('"') or phrase.startswith('{'):
527	phrase = phrase[1:]
528	elif phrase.startswith('"'):
529	phrase = phrase.replace('"','{',1)
530
531	if phrase_count != length-1:
532	if phrase.endswith('"') or phrase.endswith('}'):
533	phrase = phrase[:-1]
534	else:
535	if phrase.endswith('"'):
536	phrase = phrase[:-1]
537	phrase = phrase + "}"
538	elif phrase.endswith('",'):
539	phrase = phrase[:-2]
540	phrase = phrase + "},"
541
542	# if phrase did have \#, add the \# back
543	if phrase.endswith('\\'):
544	phrase = phrase + "#"
545	concat_line = concat_line + ' ' + phrase
546
547	phrase_count = phrase_count + 1
548
549	return concat_line
550
551
552	#
553	# substitute abbreviations into filecont
554	# @param filecont_source - string of data from file
555	#
556	def bibtex_replace_abbreviations(filecont_source):
557	filecont = filecont_source.splitlines()
558
559	# These are defined in bibtex, so we'll define them too
560	abbr_list = ['jan','feb','mar','apr','may','jun',
561	'jul','aug','sep','oct','nov','dec']
562	value_list = ['January','February','March','April',
563	'May','June','July','August','September',
564	'October','November','December']
565
566	abbr_rex = []
567	total_abbr_count = 0
568
569	front = '\\b'
570	back = '(,?)\\b'
571
572	for x in abbr_list:
573	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
574	total_abbr_count = total_abbr_count + 1
575
576
577	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
578	re.I)
579
580	comment_rex = re.compile('@comment\s*{',re.I)
581	preamble_rex = re.compile('@preamble\s*{',re.I)
582
583	waiting_for_end_string = 0
584	i = 0
585	filecont2 = ''
586
587	for line in filecont:
588	if line == ' ' or line == '':
589	continue
590
591	if waiting_for_end_string:
592	if re.search('}',line):
593	waiting_for_end_string = 0
594	continue
595
596	if abbrdef_rex.search(line):
597	abbr = abbrdef_rex.sub('\g<1>', line)
598
599	if abbr_list.count(abbr) == 0:
600	val = abbrdef_rex.sub('\g<2>', line)
601	abbr_list.append(abbr)
602	value_list.append(string.strip(val))
603	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
604	total_abbr_count = total_abbr_count + 1
605	waiting_for_end_string = 1
606	continue
607
608	if comment_rex.search(line):
609	waiting_for_end_string = 1
610	continue
611
612	if preamble_rex.search(line):
613	waiting_for_end_string = 1
614	continue
615
616
617	# replace subsequent abbreviations with the value
618	abbr_count = 0
619
620	for x in abbr_list:
621
622	if abbr_rex[abbr_count].search(line):
623	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
624	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
625	# Check for # concatenations
626	if concatsplit_rex.search(line):
627	line = concat_line(line)
628	abbr_count = abbr_count + 1
629
630
631	filecont2 = filecont2 + line + '\n'
632	i = i+1
633
634
635	# Do one final pass over file
636
637	# make sure that didn't end up with {" or }" after the substitution
638	filecont2 = filecont2.replace('{"','{{')
639	filecont2 = filecont2.replace('"}','}}')
640
641	afterquotevalue_rex = re.compile('"\s,\s')
642	afterbrace_rex = re.compile('"\s*}')
643	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
644
645	# add new lines to data that changed because of abbreviation substitutions
646	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
647	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
648	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
649
650	return filecont2
651
652	#
653	# convert @type( ... ) to @type{ ... }
654	#
655	def no_outer_parens(filecont):
656
657	# do checking for open parens
658	# will convert to braces
659	paren_split = re.split('([(){}])',filecont)
660
661	open_paren_count = 0
662	open_type = 0
663	look_next = 0
664
665	# rebuild filecont
666	filecont = ''
667
668	at_rex = re.compile('@\w*')
669
670	for phrase in paren_split:
671	if look_next == 1:
672	if phrase == '(':
673	phrase = '{'
674	open_paren_count = open_paren_count + 1
675	else:
676	open_type = 0
677	look_next = 0
678
679	if phrase == '(':
680	open_paren_count = open_paren_count + 1
681
682	elif phrase == ')':
683	open_paren_count = open_paren_count - 1
684	if open_type == 1 and open_paren_count == 0:
685	phrase = '}'
686	open_type = 0
687
688	elif at_rex.search( phrase ):
689	open_type = 1
690	look_next = 1
691
692	filecont = filecont + phrase
693
694	return filecont
695
696
697	#
698	# make all whitespace into just one space
699	# format the bibtex file into a usable form.
700	#
701	def bibtexwasher(filecont_source):
702
703	space_rex = re.compile('\s+')
704	comment_rex = re.compile('\s*%')
705
706	filecont = []
707
708	# remove trailing and excessive whitespace
709	# ignore comments
710	for line in filecont_source:
711	line = string.strip(line)
712	line = space_rex.sub(' ', line)
713	# ignore comments
714	if not comment_rex.match(line) and line != '':
715	filecont.append(' '+ line)
716
717	filecont = string.join(filecont, '')
718
719	# the file is in one long string
720
721	filecont = no_outer_parens(filecont)
722
723	#
724	# split lines according to preferred syntax scheme
725	#
726	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
727
728	# add new lines after commas that are after values
729	filecont = re.sub('"\s*,', '",\n', filecont)
730	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
731	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
732	'\n\n\g<1>\g<2>,\n', filecont)
733
734	# add new lines after }
735	filecont = re.sub('"\s*}','"\n}\n', filecont)
736	filecont = re.sub('}\s*,','},\n', filecont)
737
738
739	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
740
741	# character encoding, reserved latex characters
742	filecont = re.sub('{\\\&}', '&', filecont)
743	filecont = re.sub('\\\&', '&', filecont)
744
745	# do checking for open braces to get format correct
746	open_brace_count = 0
747	brace_split = re.split('([{}])',filecont)
748
749	# rebuild filecont
750	filecont = ''
751
752	for phrase in brace_split:
753	if phrase == '{':
754	open_brace_count = open_brace_count + 1
755	elif phrase == '}':
756	open_brace_count = open_brace_count - 1
757	if open_brace_count == 0:
758	filecont = filecont + '\n'
759
760	filecont = filecont + phrase
761
762	filecont2 = bibtex_replace_abbreviations(filecont)
763
764	# gather
765	filecont = filecont2.splitlines()
766	i=0
767	j=0 # count the number of blank lines
768	for line in filecont:
769	# ignore blank lines
770	if line == '' or line == ' ':
771	j = j+1
772	continue
773	filecont[i] = line + '\n'
774	i = i+1
775
776	# get rid of the extra stuff at the end of the array
777	# (The extra stuff are duplicates that are in the array because
778	# blank lines were removed.)
779	length = len( filecont)
780	filecont[length-j:length] = []
781
782	return filecont
783
784
785	def filehandler(filepath):
786	try:
787	fd = open(filepath, 'r')
788	filecont_source = fd.readlines()
789	fd.close()
790	except:
791	print 'Could not open file:', filepath
792	washeddata = bibtexwasher(filecont_source)
793	outdata = bibtexdecoder(washeddata)
794	print '/**'
795	print '\page references References'
796	print
797	for line in outdata:
798	print line
799	print '*/'
800
801
802	# main program
803
804	def main():
805	import sys
806	if sys.argv[1:]:
807	filepath = sys.argv[1]
808	else:
809	print "No input file"
810	sys.exit()
811	filehandler(filepath)
812
813	if __name__ == "__main__": main()
814
815
816	# end python script

Note: See TracBrowser for help on using the repository browser.

Download in other formats: