COIN-OR::LEMON - Graph Library

Context Navigation

source: lemon-main/scripts/bib2dox.py @ 826:02109e17027f

Last change on this file since 826:02109e17027f was 754:2de0fc630899, checked in by Peter Kovacs <kpeter@…>, 15 years ago
Handle url fields in bib2dox.py (#184) and modify the bibtex file using url fields.
Property exe set to ``*
File size: 25.6 KB

Line
1	#!/usr/bin/env /usr/local/Python/bin/python2.1
2	"""
3	BibTeX to Doxygen converter
4	Usage: python bib2dox.py bibfile.bib > bibfile.dox
5
6	This code is the modification of the BibTeX to XML converter
7	by Vidar Bronken Gundersen et al. See the original copyright notices below.
8
9	**********************************************************************
10
11	Decoder for bibliographic data, BibTeX
12	Usage: python bibtex2xml.py bibfile.bib > bibfile.xml
13
14	v.8
15	(c)2002-06-23 Vidar Bronken Gundersen
16	http://bibtexml.sf.net/
17	Reuse approved as long as this notification is kept.
18	Licence: GPL.
19
20	Contributions/thanks to:
21	Egon Willighagen, http://sf.net/projects/jreferences/
22	Richard Mahoney (for providing a test case)
23
24	Editted by Sara Sprenkle to be more robust and handle more bibtex features.
25	(c) 2003-01-15
26
27	1. Changed bibtex: tags to bibxml: tags.
28	2. Use xmlns:bibxml="http://bibtexml.sf.net/"
29	3. Allow spaces between @type and first {
30	4. "author" fields with multiple authors split by " and "
31	are put in separate xml "bibxml:author" tags.
32	5. Option for Titles: words are capitalized
33	only if first letter in title or capitalized inside braces
34	6. Removes braces from within field values
35	7. Ignores comments in bibtex file (including @comment{ or % )
36	8. Replaces some special latex tags, e.g., replaces ~ with ' '
37	9. Handles bibtex @string abbreviations
38	--> includes bibtex's default abbreviations for months
39	--> does concatenation of abbr # " more " and " more " # abbr
40	10. Handles @type( ... ) or @type{ ... }
41	11. The keywords field is split on , or ; and put into separate xml
42	"bibxml:keywords" tags
43	12. Ignores @preamble
44
45	Known Limitations
46	1. Does not transform Latex encoding like math mode and special
47	latex symbols.
48	2. Does not parse author fields into first and last names.
49	E.g., It does not do anything special to an author whose name is
50	in the form LAST_NAME, FIRST_NAME
51	In "author" tag, will show up as
52	<bibxml:author>LAST_NAME, FIRST_NAME</bibxml:author>
53	3. Does not handle "crossref" fields other than to print
54	<bibxml:crossref>...</bibxml:crossref>
55	4. Does not inform user of the input's format errors. You just won't
56	be able to transform the file later with XSL
57
58	You will have to manually edit the XML output if you need to handle
59	these (and unknown) limitations.
60
61	"""
62
63	import string, re
64
65	# set of valid name characters
66	valid_name_chars = '[\w\-:]'
67
68	#
69	# define global regular expression variables
70	#
71	author_rex = re.compile('\s+and\s+')
72	rembraces_rex = re.compile('[{}]')
73	capitalize_rex = re.compile('({[^}]*})')
74
75	# used by bibtexkeywords(data)
76	keywords_rex = re.compile('[,;]')
77
78	# used by concat_line(line)
79	concatsplit_rex = re.compile('\s#\s')
80
81	# split on {, }, or " in verify_out_of_braces
82	delimiter_rex = re.compile('([{}"])',re.I)
83
84	field_rex = re.compile('\s(\w)\s=\s(.*)')
85	data_rex = re.compile('\s(\w)\s=\s([^,]*),?')
86
87	url_rex = re.compile('\\\url\{([^}]*)\}')
88
89	#
90	# styles for html formatting
91	#
92	divstyle = 'margin-top: -4ex; margin-left: 8em;'
93
94	#
95	# return the string parameter without braces
96	#
97	def transformurls(str):
98	return url_rex.sub(r'<a href="\1">\1</a>', str)
99
100	#
101	# return the string parameter without braces
102	#
103	def removebraces(str):
104	return rembraces_rex.sub('', str)
105
106	#
107	# latex-specific replacements
108	# (do this after braces were removed)
109	#
110	def latexreplacements(line):
111	line = string.replace(line, '~', ' ')
112	line = string.replace(line, '\\\'a', 'á')
113	line = string.replace(line, '\\"a', 'ä')
114	line = string.replace(line, '\\\'e', 'é')
115	line = string.replace(line, '\\"e', 'ë')
116	line = string.replace(line, '\\\'i', 'í')
117	line = string.replace(line, '\\"i', 'ï')
118	line = string.replace(line, '\\\'o', 'ó')
119	line = string.replace(line, '\\"o', 'ö')
120	line = string.replace(line, '\\\'u', 'ú')
121	line = string.replace(line, '\\"u', 'ü')
122	line = string.replace(line, '\\H o', 'õ')
123	line = string.replace(line, '\\H u', 'ü') # &utilde; does not exist
124	line = string.replace(line, '\\\'A', 'Á')
125	line = string.replace(line, '\\"A', 'Ä')
126	line = string.replace(line, '\\\'E', 'É')
127	line = string.replace(line, '\\"E', 'Ë')
128	line = string.replace(line, '\\\'I', 'Í')
129	line = string.replace(line, '\\"I', 'Ï')
130	line = string.replace(line, '\\\'O', 'Ó')
131	line = string.replace(line, '\\"O', 'Ö')
132	line = string.replace(line, '\\\'U', 'Ú')
133	line = string.replace(line, '\\"U', 'Ü')
134	line = string.replace(line, '\\H O', 'Õ')
135	line = string.replace(line, '\\H U', 'Ü') # &Utilde; does not exist
136
137	return line
138
139	#
140	# copy characters form a string decoding html expressions (&xyz;)
141	#
142	def copychars(str, ifrom, count):
143	result = ''
144	i = ifrom
145	c = 0
146	html_spec = False
147	while (i < len(str)) and (c < count):
148	if str[i] == '&':
149	html_spec = True;
150	if i+1 < len(str):
151	result += str[i+1]
152	c += 1
153	i += 2
154	else:
155	if not html_spec:
156	if ((str[i] >= 'A') and (str[i] <= 'Z')) or \
157	((str[i] >= 'a') and (str[i] <= 'z')):
158	result += str[i]
159	c += 1
160	elif str[i] == ';':
161	html_spec = False;
162	i += 1
163
164	return result
165
166
167	#
168	# Handle a list of authors (separated by 'and').
169	# It gives back an array of the follwing values:
170	# - num: the number of authors,
171	# - list: the list of the author names,
172	# - text: the bibtex text (separated by commas and/or 'and')
173	# - abbrev: abbreviation that can be used for indicate the
174	# bibliography entries
175	#
176	def bibtexauthor(data):
177	result = {}
178	bibtex = ''
179	result['list'] = author_rex.split(data)
180	result['num'] = len(result['list'])
181	for i, author in enumerate(result['list']):
182	# general transformations
183	author = latexreplacements(removebraces(author.strip()))
184	# transform "Xyz, A. B." to "A. B. Xyz"
185	pos = author.find(',')
186	if pos != -1:
187	author = author[pos+1:].strip() + ' ' + author[:pos].strip()
188	result['list'][i] = author
189	bibtex += author + '#'
190	bibtex = bibtex[:-1]
191	if result['num'] > 1:
192	ix = bibtex.rfind('#')
193	if result['num'] == 2:
194	bibtex = bibtex[:ix] + ' and ' + bibtex[ix+1:]
195	else:
196	bibtex = bibtex[:ix] + ', and ' + bibtex[ix+1:]
197	bibtex = bibtex.replace('#', ', ')
198	result['text'] = bibtex
199
200	result['abbrev'] = ''
201	for author in result['list']:
202	pos = author.rfind(' ') + 1
203	count = 1
204	if result['num'] == 1:
205	count = 3
206	result['abbrev'] += copychars(author, pos, count)
207
208	return result
209
210
211	#
212	# data = title string
213	# @return the capitalized title (first letter is capitalized), rest are capitalized
214	# only if capitalized inside braces
215	#
216	def capitalizetitle(data):
217	title_list = capitalize_rex.split(data)
218	title = ''
219	count = 0
220	for phrase in title_list:
221	check = string.lstrip(phrase)
222
223	# keep phrase's capitalization the same
224	if check.find('{') == 0:
225	title += removebraces(phrase)
226	else:
227	# first word --> capitalize first letter (after spaces)
228	if count == 0:
229	title += check.capitalize()
230	else:
231	title += phrase.lower()
232	count = count + 1
233
234	return title
235
236
237	#
238	# @return the bibtex for the title
239	# @param data --> title string
240	# braces are removed from title
241	#
242	def bibtextitle(data, entrytype):
243	if entrytype in ('book', 'inbook'):
244	title = removebraces(data.strip())
245	else:
246	title = removebraces(capitalizetitle(data.strip()))
247	bibtex = title
248	return bibtex
249
250
251	#
252	# function to compare entry lists
253	#
254	def entry_cmp(x, y):
255	return cmp(x[0], y[0])
256
257
258	#
259	# print the XML for the transformed "filecont_source"
260	#
261	def bibtexdecoder(filecont_source):
262	filecont = []
263	file = []
264
265	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
266	pubtype_rex = re.compile('@(\w)\s{\s(.),')
267	endtype_rex = re.compile('}\s*$')
268	endtag_rex = re.compile('^\s}\s$')
269
270	bracefield_rex = re.compile('\s(\w)\s=\s(.*)')
271	bracedata_rex = re.compile('\s(\w)\s=\s{(.*)},?')
272
273	quotefield_rex = re.compile('\s(\w)\s=\s(.*)')
274	quotedata_rex = re.compile('\s(\w)\s=\s"(.*)",?')
275
276	for line in filecont_source:
277	line = line[:-1]
278
279	# encode character entities
280	line = string.replace(line, '&', '&')
281	line = string.replace(line, '<', '<')
282	line = string.replace(line, '>', '>')
283
284	# start entry: publication type (store for later use)
285	if pubtype_rex.match(line):
286	# want @<alphanumeric chars><spaces>{<spaces><any chars>,
287	entrycont = {}
288	entry = []
289	entrytype = pubtype_rex.sub('\g<1>',line)
290	entrytype = string.lower(entrytype)
291	entryid = pubtype_rex.sub('\g<2>', line)
292
293	# end entry if just a }
294	elif endtype_rex.match(line):
295	# generate doxygen code for the entry
296
297	# enty type related formattings
298	if entrytype in ('book', 'inbook'):
299	entrycont['title'] = '<em>' + entrycont['title'] + '</em>'
300	if not entrycont.has_key('author'):
301	entrycont['author'] = entrycont['editor']
302	entrycont['author']['text'] += ', editors'
303	elif entrytype == 'article':
304	entrycont['journal'] = '<em>' + entrycont['journal'] + '</em>'
305	elif entrytype in ('inproceedings', 'incollection', 'conference'):
306	entrycont['booktitle'] = '<em>' + entrycont['booktitle'] + '</em>'
307	elif entrytype == 'techreport':
308	if not entrycont.has_key('type'):
309	entrycont['type'] = 'Technical report'
310	elif entrytype == 'mastersthesis':
311	entrycont['type'] = 'Master\'s thesis'
312	elif entrytype == 'phdthesis':
313	entrycont['type'] = 'PhD thesis'
314
315	for eline in entrycont:
316	if eline != '':
317	eline = latexreplacements(eline)
318
319	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
320	entrycont['pages'] = string.replace(entrycont['pages'], '--', '-')
321
322	if entrycont.has_key('author') and (entrycont['author'] != ''):
323	entry.append(entrycont['author']['text'] + '.')
324	if entrycont.has_key('title') and (entrycont['title'] != ''):
325	entry.append(entrycont['title'] + '.')
326	if entrycont.has_key('journal') and (entrycont['journal'] != ''):
327	entry.append(entrycont['journal'] + ',')
328	if entrycont.has_key('booktitle') and (entrycont['booktitle'] != ''):
329	entry.append('In ' + entrycont['booktitle'] + ',')
330	if entrycont.has_key('type') and (entrycont['type'] != ''):
331	eline = entrycont['type']
332	if entrycont.has_key('number') and (entrycont['number'] != ''):
333	eline += ' ' + entrycont['number']
334	eline += ','
335	entry.append(eline)
336	if entrycont.has_key('institution') and (entrycont['institution'] != ''):
337	entry.append(entrycont['institution'] + ',')
338	if entrycont.has_key('publisher') and (entrycont['publisher'] != ''):
339	entry.append(entrycont['publisher'] + ',')
340	if entrycont.has_key('school') and (entrycont['school'] != ''):
341	entry.append(entrycont['school'] + ',')
342	if entrycont.has_key('address') and (entrycont['address'] != ''):
343	entry.append(entrycont['address'] + ',')
344	if entrycont.has_key('edition') and (entrycont['edition'] != ''):
345	entry.append(entrycont['edition'] + ' edition,')
346	if entrycont.has_key('howpublished') and (entrycont['howpublished'] != ''):
347	entry.append(entrycont['howpublished'] + ',')
348	if entrycont.has_key('volume') and (entrycont['volume'] != ''):
349	eline = entrycont['volume'];
350	if entrycont.has_key('number') and (entrycont['number'] != ''):
351	eline += '(' + entrycont['number'] + ')'
352	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
353	eline += ':' + entrycont['pages']
354	eline += ','
355	entry.append(eline)
356	else:
357	if entrycont.has_key('pages') and (entrycont['pages'] != ''):
358	entry.append('pages ' + entrycont['pages'] + ',')
359	if entrycont.has_key('year') and (entrycont['year'] != ''):
360	if entrycont.has_key('month') and (entrycont['month'] != ''):
361	entry.append(entrycont['month'] + ' ' + entrycont['year'] + '.')
362	else:
363	entry.append(entrycont['year'] + '.')
364	if entrycont.has_key('note') and (entrycont['note'] != ''):
365	entry.append(entrycont['note'] + '.')
366	if entrycont.has_key('url') and (entrycont['url'] != ''):
367	entry.append(entrycont['url'] + '.')
368
369	# generate keys for sorting and for the output
370	sortkey = ''
371	bibkey = ''
372	if entrycont.has_key('author'):
373	for author in entrycont['author']['list']:
374	sortkey += copychars(author, author.rfind(' ')+1, len(author))
375	bibkey = entrycont['author']['abbrev']
376	else:
377	bibkey = 'x'
378	if entrycont.has_key('year'):
379	sortkey += entrycont['year']
380	bibkey += entrycont['year'][-2:]
381	if entrycont.has_key('title'):
382	sortkey += entrycont['title']
383	if entrycont.has_key('key'):
384	sortkey = entrycont['key'] + sortkey
385	bibkey = entrycont['key']
386	entry.insert(0, sortkey)
387	entry.insert(1, bibkey)
388	entry.insert(2, entryid)
389
390	# add the entry to the file contents
391	filecont.append(entry)
392
393	else:
394	# field, publication info
395	field = ''
396	data = ''
397
398	# field = {data} entries
399	if bracedata_rex.match(line):
400	field = bracefield_rex.sub('\g<1>', line)
401	field = string.lower(field)
402	data = bracedata_rex.sub('\g<2>', line)
403
404	# field = "data" entries
405	elif quotedata_rex.match(line):
406	field = quotefield_rex.sub('\g<1>', line)
407	field = string.lower(field)
408	data = quotedata_rex.sub('\g<2>', line)
409
410	# field = data entries
411	elif data_rex.match(line):
412	field = field_rex.sub('\g<1>', line)
413	field = string.lower(field)
414	data = data_rex.sub('\g<2>', line)
415
416	if field == 'url':
417	data = '\\url{' + data.strip() + '}'
418
419	if field in ('author', 'editor'):
420	entrycont[field] = bibtexauthor(data)
421	line = ''
422	elif field == 'title':
423	line = bibtextitle(data, entrytype)
424	elif field != '':
425	line = removebraces(transformurls(data.strip()))
426
427	if line != '':
428	line = latexreplacements(line)
429	entrycont[field] = line
430
431
432	# sort entries
433	filecont.sort(entry_cmp)
434
435	# count the bibtex keys
436	keytable = {}
437	counttable = {}
438	for entry in filecont:
439	bibkey = entry[1]
440	if not keytable.has_key(bibkey):
441	keytable[bibkey] = 1
442	else:
443	keytable[bibkey] += 1
444
445	for bibkey in keytable.keys():
446	counttable[bibkey] = 0
447
448	# generate output
449	for entry in filecont:
450	# generate output key form the bibtex key
451	bibkey = entry[1]
452	entryid = entry[2]
453	if keytable[bibkey] == 1:
454	outkey = bibkey
455	else:
456	outkey = bibkey + chr(97 + counttable[bibkey])
457	counttable[bibkey] += 1
458
459	# append the entry code to the output
460	file.append('\\section ' + entryid + ' [' + outkey + ']')
461	file.append('<div style="' + divstyle + '">')
462	for line in entry[3:]:
463	file.append(line)
464	file.append('</div>')
465	file.append('')
466
467	return file
468
469
470	#
471	# return 1 iff abbr is in line but not inside braces or quotes
472	# assumes that abbr appears only once on the line (out of braces and quotes)
473	#
474	def verify_out_of_braces(line, abbr):
475
476	phrase_split = delimiter_rex.split(line)
477
478	abbr_rex = re.compile( '\\b' + abbr + '\\b', re.I)
479
480	open_brace = 0
481	open_quote = 0
482
483	for phrase in phrase_split:
484	if phrase == "{":
485	open_brace = open_brace + 1
486	elif phrase == "}":
487	open_brace = open_brace - 1
488	elif phrase == '"':
489	if open_quote == 1:
490	open_quote = 0
491	else:
492	open_quote = 1
493	elif abbr_rex.search(phrase):
494	if open_brace == 0 and open_quote == 0:
495	return 1
496
497	return 0
498
499
500	#
501	# a line in the form phrase1 # phrase2 # ... # phrasen
502	# is returned as phrase1 phrase2 ... phrasen
503	# with the correct punctuation
504	# Bug: Doesn't always work with multiple abbreviations plugged in
505	#
506	def concat_line(line):
507	# only look at part after equals
508	field = field_rex.sub('\g<1>',line)
509	rest = field_rex.sub('\g<2>',line)
510
511	concat_line = field + ' ='
512
513	pound_split = concatsplit_rex.split(rest)
514
515	phrase_count = 0
516	length = len(pound_split)
517
518	for phrase in pound_split:
519	phrase = phrase.strip()
520	if phrase_count != 0:
521	if phrase.startswith('"') or phrase.startswith('{'):
522	phrase = phrase[1:]
523	elif phrase.startswith('"'):
524	phrase = phrase.replace('"','{',1)
525
526	if phrase_count != length-1:
527	if phrase.endswith('"') or phrase.endswith('}'):
528	phrase = phrase[:-1]
529	else:
530	if phrase.endswith('"'):
531	phrase = phrase[:-1]
532	phrase = phrase + "}"
533	elif phrase.endswith('",'):
534	phrase = phrase[:-2]
535	phrase = phrase + "},"
536
537	# if phrase did have \#, add the \# back
538	if phrase.endswith('\\'):
539	phrase = phrase + "#"
540	concat_line = concat_line + ' ' + phrase
541
542	phrase_count = phrase_count + 1
543
544	return concat_line
545
546
547	#
548	# substitute abbreviations into filecont
549	# @param filecont_source - string of data from file
550	#
551	def bibtex_replace_abbreviations(filecont_source):
552	filecont = filecont_source.splitlines()
553
554	# These are defined in bibtex, so we'll define them too
555	abbr_list = ['jan','feb','mar','apr','may','jun',
556	'jul','aug','sep','oct','nov','dec']
557	value_list = ['January','February','March','April',
558	'May','June','July','August','September',
559	'October','November','December']
560
561	abbr_rex = []
562	total_abbr_count = 0
563
564	front = '\\b'
565	back = '(,?)\\b'
566
567	for x in abbr_list:
568	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
569	total_abbr_count = total_abbr_count + 1
570
571
572	abbrdef_rex = re.compile('\s@string\s{\s('+ valid_name_chars +')\s=(.)',
573	re.I)
574
575	comment_rex = re.compile('@comment\s*{',re.I)
576	preamble_rex = re.compile('@preamble\s*{',re.I)
577
578	waiting_for_end_string = 0
579	i = 0
580	filecont2 = ''
581
582	for line in filecont:
583	if line == ' ' or line == '':
584	continue
585
586	if waiting_for_end_string:
587	if re.search('}',line):
588	waiting_for_end_string = 0
589	continue
590
591	if abbrdef_rex.search(line):
592	abbr = abbrdef_rex.sub('\g<1>', line)
593
594	if abbr_list.count(abbr) == 0:
595	val = abbrdef_rex.sub('\g<2>', line)
596	abbr_list.append(abbr)
597	value_list.append(string.strip(val))
598	abbr_rex.append( re.compile( front + abbr_list[total_abbr_count] + back, re.I ) )
599	total_abbr_count = total_abbr_count + 1
600	waiting_for_end_string = 1
601	continue
602
603	if comment_rex.search(line):
604	waiting_for_end_string = 1
605	continue
606
607	if preamble_rex.search(line):
608	waiting_for_end_string = 1
609	continue
610
611
612	# replace subsequent abbreviations with the value
613	abbr_count = 0
614
615	for x in abbr_list:
616
617	if abbr_rex[abbr_count].search(line):
618	if verify_out_of_braces(line,abbr_list[abbr_count]) == 1:
619	line = abbr_rex[abbr_count].sub( value_list[abbr_count] + '\g<1>', line)
620	# Check for # concatenations
621	if concatsplit_rex.search(line):
622	line = concat_line(line)
623	abbr_count = abbr_count + 1
624
625
626	filecont2 = filecont2 + line + '\n'
627	i = i+1
628
629
630	# Do one final pass over file
631
632	# make sure that didn't end up with {" or }" after the substitution
633	filecont2 = filecont2.replace('{"','{{')
634	filecont2 = filecont2.replace('"}','}}')
635
636	afterquotevalue_rex = re.compile('"\s,\s')
637	afterbrace_rex = re.compile('"\s*}')
638	afterbracevalue_rex = re.compile('(=\s{[^=])},\s*')
639
640	# add new lines to data that changed because of abbreviation substitutions
641	filecont2 = afterquotevalue_rex.sub('",\n', filecont2)
642	filecont2 = afterbrace_rex.sub('"\n}', filecont2)
643	filecont2 = afterbracevalue_rex.sub('\g<1>},\n', filecont2)
644
645	return filecont2
646
647	#
648	# convert @type( ... ) to @type{ ... }
649	#
650	def no_outer_parens(filecont):
651
652	# do checking for open parens
653	# will convert to braces
654	paren_split = re.split('([(){}])',filecont)
655
656	open_paren_count = 0
657	open_type = 0
658	look_next = 0
659
660	# rebuild filecont
661	filecont = ''
662
663	at_rex = re.compile('@\w*')
664
665	for phrase in paren_split:
666	if look_next == 1:
667	if phrase == '(':
668	phrase = '{'
669	open_paren_count = open_paren_count + 1
670	else:
671	open_type = 0
672	look_next = 0
673
674	if phrase == '(':
675	open_paren_count = open_paren_count + 1
676
677	elif phrase == ')':
678	open_paren_count = open_paren_count - 1
679	if open_type == 1 and open_paren_count == 0:
680	phrase = '}'
681	open_type = 0
682
683	elif at_rex.search( phrase ):
684	open_type = 1
685	look_next = 1
686
687	filecont = filecont + phrase
688
689	return filecont
690
691
692	#
693	# make all whitespace into just one space
694	# format the bibtex file into a usable form.
695	#
696	def bibtexwasher(filecont_source):
697
698	space_rex = re.compile('\s+')
699	comment_rex = re.compile('\s*%')
700
701	filecont = []
702
703	# remove trailing and excessive whitespace
704	# ignore comments
705	for line in filecont_source:
706	line = string.strip(line)
707	line = space_rex.sub(' ', line)
708	# ignore comments
709	if not comment_rex.match(line) and line != '':
710	filecont.append(' '+ line)
711
712	filecont = string.join(filecont, '')
713
714	# the file is in one long string
715
716	filecont = no_outer_parens(filecont)
717
718	#
719	# split lines according to preferred syntax scheme
720	#
721	filecont = re.sub('(=\s{[^=])},', '\g<1>},\n', filecont)
722
723	# add new lines after commas that are after values
724	filecont = re.sub('"\s*,', '",\n', filecont)
725	filecont = re.sub('=\s([\w\d]+)\s,', '= \g<1>,\n', filecont)
726	filecont = re.sub('(@\w)\s({(\s)[^,\s])\s*,',
727	'\n\n\g<1>\g<2>,\n', filecont)
728
729	# add new lines after }
730	filecont = re.sub('"\s*}','"\n}\n', filecont)
731	filecont = re.sub('}\s*,','},\n', filecont)
732
733
734	filecont = re.sub('@(\w*)', '\n@\g<1>', filecont)
735
736	# character encoding, reserved latex characters
737	filecont = re.sub('{\\\&}', '&', filecont)
738	filecont = re.sub('\\\&', '&', filecont)
739
740	# do checking for open braces to get format correct
741	open_brace_count = 0
742	brace_split = re.split('([{}])',filecont)
743
744	# rebuild filecont
745	filecont = ''
746
747	for phrase in brace_split:
748	if phrase == '{':
749	open_brace_count = open_brace_count + 1
750	elif phrase == '}':
751	open_brace_count = open_brace_count - 1
752	if open_brace_count == 0:
753	filecont = filecont + '\n'
754
755	filecont = filecont + phrase
756
757	filecont2 = bibtex_replace_abbreviations(filecont)
758
759	# gather
760	filecont = filecont2.splitlines()
761	i=0
762	j=0 # count the number of blank lines
763	for line in filecont:
764	# ignore blank lines
765	if line == '' or line == ' ':
766	j = j+1
767	continue
768	filecont[i] = line + '\n'
769	i = i+1
770
771	# get rid of the extra stuff at the end of the array
772	# (The extra stuff are duplicates that are in the array because
773	# blank lines were removed.)
774	length = len( filecont)
775	filecont[length-j:length] = []
776
777	return filecont
778
779
780	def filehandler(filepath):
781	try:
782	fd = open(filepath, 'r')
783	filecont_source = fd.readlines()
784	fd.close()
785	except:
786	print 'Could not open file:', filepath
787	washeddata = bibtexwasher(filecont_source)
788	outdata = bibtexdecoder(washeddata)
789	print '/**'
790	print '\page references References'
791	print
792	for line in outdata:
793	print line
794	print '*/'
795
796
797	# main program
798
799	def main():
800	import sys
801	if sys.argv[1:]:
802	filepath = sys.argv[1]
803	else:
804	print "No input file"
805	sys.exit()
806	filehandler(filepath)
807
808	if __name__ == "__main__": main()
809
810
811	# end python script

Note: See TracBrowser for help on using the repository browser.

Download in other formats: