Blame - third_party/markdown.py - monorail-avm99963

blob: f6a9a87cbe13367b8aabae77087aebe626612e1c [file] [log] [blame]

Copybara	854996b	2021-09-07 19:36:02 +0000	[diff] [blame]	1	#!/usr/bin/python
				2	"""markdown.py: A Markdown-styled-text to HTML converter in Python.
				3
				4	Usage:
				5	./markdown.py textfile.markdown
				6
				7	Calling:
				8	import markdown
				9	somehtml = markdown.markdown(sometext)
				10
				11	For other versions of markdown, see:
				12	http://www.freewisdom.org/projects/python-markdown/
				13	http://en.wikipedia.org/wiki/Markdown
				14	"""
				15
				16	from __future__ import absolute_import
				17	from __future__ import division
				18	from __future__ import print_function
				19
				20	import md5
				21	import re
				22	import sys
				23
				24	__version__ = '1.0.1-2' # port of 1.0.1
				25	__license__ = "GNU GPL 2"
				26	__author__ = [
				27	'John Gruber <http://daringfireball.net/>',
				28	'Tollef Fog Heen <tfheen@err.no>',
				29	'Aaron Swartz <me@aaronsw.com>'
				30	]
				31
				32	def htmlquote(text):
				33	"""Encodes `text` for raw use in HTML."""
				34	text = text.replace("&", "&") # Must be done first!
				35	text = text.replace("<", "<")
				36	text = text.replace(">", ">")
				37	text = text.replace("'", "'")
				38	text = text.replace('"', """)
				39	return text
				40
				41	def semirandom(seed):
				42	x = 0
				43	for c in md5.new(seed).digest(): x += ord(c)
				44	return x / (255*16.)
				45
				46	class _Markdown:
				47	emptyelt = " />"
				48	tabwidth = 4
				49
				50	escapechars = '\\`*_{}[]()>#+-.!'
				51	escapetable = {}
				52	for char in escapechars:
				53	escapetable[char] = md5.new(char).hexdigest()
				54
				55	r_multiline = re.compile("\n{2,}")
				56	r_stripspace = re.compile(r"^[ \t]+$", re.MULTILINE)
				57	def parse(self, text):
				58	self.urls = {}
				59	self.titles = {}
				60	self.html_blocks = {}
				61	self.list_level = 0
				62
				63	text = text.replace("\r\n", "\n")
				64	text = text.replace("\r", "\n")
				65	text += "\n\n"
				66	text = self._Detab(text)
				67	text = self.r_stripspace.sub("", text)
				68	text = self._HashHTMLBlocks(text)
				69	text = self._StripLinkDefinitions(text)
				70	text = self._RunBlockGamut(text)
				71	text = self._UnescapeSpecialChars(text)
				72	return text
				73
				74	r_StripLinkDefinitions = re.compile(r"""
				75	^[ ]{0,%d}\[(.+)\]: # id = $1
				76	[ \t]\n?[ \t]
				77	<?(\S+?)>? # url = $2
				78	[ \t]\n?[ \t]
				79	(?:
				80	(?<=\s) # lookbehind for whitespace
				81	[\"\(] # " is backlashed so it colorizes our code right
				82	(.+?) # title = $3
				83	[\"\)]
				84	[ \t]*
				85	)? # title is optional
				86	(?:\n+\|\Z)
				87	""" % (tabwidth-1), re.MULTILINE\|re.VERBOSE)
				88	def _StripLinkDefinitions(self, text):
				89	def replacefunc(matchobj):
				90	(t1, t2, t3) = matchobj.groups()
				91	#@@ case sensitivity?
				92	self.urls[t1.lower()] = self._EncodeAmpsAndAngles(t2)
				93	if t3 is not None:
				94	self.titles[t1.lower()] = t3.replace('"', '"')
				95	return ""
				96
				97	text = self.r_StripLinkDefinitions.sub(replacefunc, text)
				98	return text
				99
				100	blocktagsb = r"p\|div\|h[1-6]\|blockquote\|pre\|table\|dl\|ol\|ul\|script\|math"
				101	blocktagsa = blocktagsb + "\|ins\|del"
				102
				103	r_HashHTMLBlocks1 = re.compile(r"""
				104	( # save in $1
				105	^ # start of line (with /m)
				106	<(%s) # start tag = $2
				107	\b # word break
				108	(.\n)? # any number of lines, minimally matching
				109	</\2> # the matching end tag
				110	[ \t]* # trailing spaces/tabs
				111	(?=\n+\|$) # followed by a newline or end of document
				112	)
				113	""" % blocktagsa, re.MULTILINE \| re.VERBOSE)
				114
				115	r_HashHTMLBlocks2 = re.compile(r"""
				116	( # save in $1
				117	^ # start of line (with /m)
				118	<(%s) # start tag = $2
				119	\b # word break
				120	(.\n)? # any number of lines, minimally matching
				121	.*</\2> # the matching end tag
				122	[ \t]* # trailing spaces/tabs
				123	(?=\n+\|\Z) # followed by a newline or end of document
				124	)
				125	""" % blocktagsb, re.MULTILINE \| re.VERBOSE)
				126
				127	r_HashHR = re.compile(r"""
				128	(?:
				129	(?<=\n\n) # Starting after a blank line
				130	\| # or
				131	\A\n? # the beginning of the doc
				132	)
				133	( # save in $1
				134	[ ]{0,%d}
				135	<(hr) # start tag = $2
				136	\b # word break
				137	([^<>])*? #
				138	/?> # the matching end tag
				139	[ \t]*
				140	(?=\n{2,}\|\Z)# followed by a blank line or end of document
				141	)
				142	""" % (tabwidth-1), re.VERBOSE)
				143	r_HashComment = re.compile(r"""
				144	(?:
				145	(?<=\n\n) # Starting after a blank line
				146	\| # or
				147	\A\n? # the beginning of the doc
				148	)
				149	( # save in $1
				150	[ ]{0,%d}
				151	(?:
				152	<!
				153	(--.?--\s)+
				154	>
				155	)
				156	[ \t]*
				157	(?=\n{2,}\|\Z)# followed by a blank line or end of document
				158	)
				159	""" % (tabwidth-1), re.VERBOSE)
				160
				161	def _HashHTMLBlocks(self, text):
				162	def handler(m):
				163	key = md5.new(m.group(1)).hexdigest()
				164	self.html_blocks[key] = m.group(1)
				165	return "\n\n%s\n\n" % key
				166
				167	text = self.r_HashHTMLBlocks1.sub(handler, text)
				168	text = self.r_HashHTMLBlocks2.sub(handler, text)
				169	oldtext = text
				170	text = self.r_HashHR.sub(handler, text)
				171	text = self.r_HashComment.sub(handler, text)
				172	return text
				173
				174	#@@@ wrong!
				175	r_hr1 = re.compile(r'^[ ]{0,2}([ ]?\[ ]?){3,}[ \t]$', re.M)
				176	r_hr2 = re.compile(r'^[ ]{0,2}([ ]?-[ ]?){3,}[ \t]*$', re.M)
				177	r_hr3 = re.compile(r'^[ ]{0,2}([ ]?_[ ]?){3,}[ \t]*$', re.M)
				178
				179	def _RunBlockGamut(self, text):
				180	text = self._DoHeaders(text)
				181	for x in [self.r_hr1, self.r_hr2, self.r_hr3]:
				182	text = x.sub("\n<hr%s\n" % self.emptyelt, text);
				183	text = self._DoLists(text)
				184	text = self._DoCodeBlocks(text)
				185	text = self._DoBlockQuotes(text)
				186
				187	# We did this in parse()
				188	# to escape the source
				189	# now it's stuff _we_ made
				190	# so we don't wrap it in <p>s.
				191	text = self._HashHTMLBlocks(text)
				192	text = self._FormParagraphs(text)
				193	return text
				194
				195	r_NewLine = re.compile(" {2,}\n")
				196	def _RunSpanGamut(self, text):
				197	text = self._DoCodeSpans(text)
				198	text = self._EscapeSpecialChars(text)
				199	text = self._DoImages(text)
				200	text = self._DoAnchors(text)
				201	text = self._DoAutoLinks(text)
				202	text = self._EncodeAmpsAndAngles(text)
				203	text = self._DoItalicsAndBold(text)
				204	text = self.r_NewLine.sub(" <br%s\n" % self.emptyelt, text)
				205	return text
				206
				207	def _EscapeSpecialChars(self, text):
				208	tokens = self._TokenizeHTML(text)
				209	text = ""
				210	for cur_token in tokens:
				211	if cur_token[0] == "tag":
				212	cur_token[1] = cur_token[1].replace('', self.escapetable[""])
				213	cur_token[1] = cur_token[1].replace('_', self.escapetable["_"])
				214	text += cur_token[1]
				215	else:
				216	text += self._EncodeBackslashEscapes(cur_token[1])
				217	return text
				218
				219	r_DoAnchors1 = re.compile(
				220	r""" ( # wrap whole match in $1
				221	\[
				222	(.*?) # link text = $2
				223	# [for bracket nesting, see below]
				224	\]
				225
				226	[ ]? # one optional space
				227	(?:\n[ ]*)? # one optional newline followed by spaces
				228
				229	\[
				230	(.*?) # id = $3
				231	\]
				232	)
				233	""", re.S\|re.VERBOSE)
				234	r_DoAnchors2 = re.compile(
				235	r""" ( # wrap whole match in $1
				236	\[
				237	(.*?) # link text = $2
				238	\]
				239	\( # literal paren
				240	[ \t]*
				241	<?(.+?)>? # href = $3
				242	[ \t]*
				243	( # $4
				244	([\'\"]) # quote char = $5
				245	(.*?) # Title = $6
				246	\5 # matching quote
				247	)? # title is optional
				248	\)
				249	)
				250	""", re.S\|re.VERBOSE)
				251	def _DoAnchors(self, text):
				252	# We here don't do the same as the perl version, as python's regex
				253	# engine gives us no way to match brackets.
				254
				255	def handler1(m):
				256	whole_match = m.group(1)
				257	link_text = m.group(2)
				258	link_id = m.group(3).lower()
				259	if not link_id: link_id = link_text.lower()
				260	title = self.titles.get(link_id, None)
				261
				262
				263	if self.urls.has_key(link_id):
				264	url = self.urls[link_id]
				265	url = url.replace("", self.escapetable[""])
				266	url = url.replace("_", self.escapetable["_"])
				267	res = '<a href="%s"' % htmlquote(url)
				268
				269	if title:
				270	title = title.replace("", self.escapetable[""])
				271	title = title.replace("_", self.escapetable["_"])
				272	res += ' title="%s"' % htmlquote(title)
				273	res += ">%s</a>" % htmlquote(link_text)
				274	else:
				275	res = whole_match
				276	return res
				277
				278	def handler2(m):
				279	whole_match = m.group(1)
				280	link_text = m.group(2)
				281	url = m.group(3)
				282	title = m.group(6)
				283
				284	url = url.replace("", self.escapetable[""])
				285	url = url.replace("_", self.escapetable["_"])
				286	res = '''<a href="%s"''' % htmlquote(url)
				287
				288	if title:
				289	title = title.replace('"', '"')
				290	title = title.replace("", self.escapetable[""])
				291	title = title.replace("_", self.escapetable["_"])
				292	res += ' title="%s"' % htmlquote(title)
				293	res += ">%s</a>" % htmlquote(link_text)
				294	return res
				295
				296	text = self.r_DoAnchors1.sub(handler1, text)
				297	text = self.r_DoAnchors2.sub(handler2, text)
				298	return text
				299
				300	r_DoImages1 = re.compile(
				301	r""" ( # wrap whole match in $1
				302	!\[
				303	(.*?) # alt text = $2
				304	\]
				305
				306	[ ]? # one optional space
				307	(?:\n[ ]*)? # one optional newline followed by spaces
				308
				309	\[
				310	(.*?) # id = $3
				311	\]
				312
				313	)
				314	""", re.VERBOSE\|re.S)
				315
				316	r_DoImages2 = re.compile(
				317	r""" ( # wrap whole match in $1
				318	!\[
				319	(.*?) # alt text = $2
				320	\]
				321	\( # literal paren
				322	[ \t]*
				323	<?(\S+?)>? # src url = $3
				324	[ \t]*
				325	( # $4
				326	([\'\"]) # quote char = $5
				327	(.*?) # title = $6
				328	\5 # matching quote
				329	[ \t]*
				330	)? # title is optional
				331	\)
				332	)
				333	""", re.VERBOSE\|re.S)
				334
				335	def _DoImages(self, text):
				336	def handler1(m):
				337	whole_match = m.group(1)
				338	alt_text = m.group(2)
				339	link_id = m.group(3).lower()
				340
				341	if not link_id:
				342	link_id = alt_text.lower()
				343
				344	alt_text = alt_text.replace('"', """)
				345	if self.urls.has_key(link_id):
				346	url = self.urls[link_id]
				347	url = url.replace("", self.escapetable[""])
				348	url = url.replace("_", self.escapetable["_"])
				349	res = '''<img src="%s" alt="%s"''' % (htmlquote(url), htmlquote(alt_text))
				350	if self.titles.has_key(link_id):
				351	title = self.titles[link_id]
				352	title = title.replace("", self.escapetable[""])
				353	title = title.replace("_", self.escapetable["_"])
				354	res += ' title="%s"' % htmlquote(title)
				355	res += self.emptyelt
				356	else:
				357	res = whole_match
				358	return res
				359
				360	def handler2(m):
				361	whole_match = m.group(1)
				362	alt_text = m.group(2)
				363	url = m.group(3)
				364	title = m.group(6) or ''
				365
				366	alt_text = alt_text.replace('"', """)
				367	title = title.replace('"', """)
				368	url = url.replace("", self.escapetable[""])
				369	url = url.replace("_", self.escapetable["_"])
				370	res = '<img src="%s" alt="%s"' % (htmlquote(url), htmlquote(alt_text))
				371	if title is not None:
				372	title = title.replace("", self.escapetable[""])
				373	title = title.replace("_", self.escapetable["_"])
				374	res += ' title="%s"' % htmlquote(title)
				375	res += self.emptyelt
				376	return res
				377
				378	text = self.r_DoImages1.sub(handler1, text)
				379	text = self.r_DoImages2.sub(handler2, text)
				380	return text
				381
				382	r_DoHeaders = re.compile(r"^(\#{1,6})[ \t](.+?)[ \t]\#*\n+", re.VERBOSE\|re.M)
				383	def _DoHeaders(self, text):
				384	def findheader(text, c, n):
				385	textl = text.split('\n')
				386	for i in range(len(textl)):
				387	if i >= len(textl): continue
				388	count = textl[i].strip().count(c)
				389	if count > 0 and count == len(textl[i].strip()) and textl[i+1].strip() == '' and textl[i-1].strip() != '':
				390	textl = textl[:i] + textl[i+1:]
				391	textl[i-1] = '<h'+n+'>'+self._RunSpanGamut(textl[i-1])+'</h'+n+'>'
				392	textl = textl[:i] + textl[i+1:]
				393	text = '\n'.join(textl)
				394	return text
				395
				396	def handler(m):
				397	level = len(m.group(1))
				398	header = self._RunSpanGamut(m.group(2))
				399	return "<h%s>%s</h%s>\n\n" % (level, header, level)
				400
				401	text = findheader(text, '=', '1')
				402	text = findheader(text, '-', '2')
				403	text = self.r_DoHeaders.sub(handler, text)
				404	return text
				405
				406	rt_l = r"""
				407	(
				408	(
				409	[ ]{0,%d}
				410	([*+-]\|\d+[.])
				411	[ \t]+
				412	)
				413	(?:.+?)
				414	(
				415	\Z
				416	\|
				417	\n{2,}
				418	(?=\S)
				419	(?![ \t]* ([*+-]\|\d+[.])[ \t]+)
				420	)
				421	)
				422	""" % (tabwidth - 1)
				423	r_DoLists = re.compile('^'+rt_l, re.M \| re.VERBOSE \| re.S)
				424	r_DoListsTop = re.compile(
				425	r'(?:\A\n?\|(?<=\n\n))'+rt_l, re.M \| re.VERBOSE \| re.S)
				426
				427	def _DoLists(self, text):
				428	def handler(m):
				429	list_type = "ol"
				430	if m.group(3) in [ "*", "-", "+" ]:
				431	list_type = "ul"
				432	listn = m.group(1)
				433	listn = self.r_multiline.sub("\n\n\n", listn)
				434	res = self._ProcessListItems(listn)
				435	res = "<%s>\n%s</%s>\n" % (list_type, res, list_type)
				436	return res
				437
				438	if self.list_level:
				439	text = self.r_DoLists.sub(handler, text)
				440	else:
				441	text = self.r_DoListsTop.sub(handler, text)
				442	return text
				443
				444	r_multiend = re.compile(r"\n{2,}\Z")
				445	r_ProcessListItems = re.compile(r"""
				446	(\n)? # leading line = $1
				447	(^[ \t]*) # leading whitespace = $2
				448	([*+-]\|\d+[.]) [ \t]+ # list marker = $3
				449	((?:.+?) # list item text = $4
				450	(\n{1,2}))
				451	(?= \n* (\Z \| \2 ([*+-]\|\d+[.]) [ \t]+))
				452	""", re.VERBOSE \| re.M \| re.S)
				453
				454	def _ProcessListItems(self, text):
				455	self.list_level += 1
				456	text = self.r_multiend.sub("\n", text)
				457
				458	def handler(m):
				459	item = m.group(4)
				460	leading_line = m.group(1)
				461	leading_space = m.group(2)
				462
				463	if leading_line or self.r_multiline.search(item):
				464	item = self._RunBlockGamut(self._Outdent(item))
				465	else:
				466	item = self._DoLists(self._Outdent(item))
				467	if item[-1] == "\n": item = item[:-1] # chomp
				468	item = self._RunSpanGamut(item)
				469	return "<li>%s</li>\n" % item
				470
				471	text = self.r_ProcessListItems.sub(handler, text)
				472	self.list_level -= 1
				473	return text
				474
				475	r_DoCodeBlocks = re.compile(r"""
				476	(?:\n\n\|\A)
				477	( # $1 = the code block
				478	(?:
				479	(?:[ ]{%d} \| \t) # Lines must start with a tab or equiv
				480	.*\n+
				481	)+
				482	)
				483	((?=^[ ]{0,%d}\S)\|\Z) # Lookahead for non-space/end of doc
				484	""" % (tabwidth, tabwidth), re.M \| re.VERBOSE)
				485	def _DoCodeBlocks(self, text):
				486	def handler(m):
				487	codeblock = m.group(1)
				488	codeblock = self._EncodeCode(self._Outdent(codeblock))
				489	codeblock = self._Detab(codeblock)
				490	codeblock = codeblock.lstrip("\n")
				491	codeblock = codeblock.rstrip()
				492	res = "\n\n<pre><code>%s\n</code></pre>\n\n" % codeblock
				493	return res
				494
				495	text = self.r_DoCodeBlocks.sub(handler, text)
				496	return text
				497	r_DoCodeSpans = re.compile(r"""
				498	(`+) # $1 = Opening run of `
				499	(.+?) # $2 = The code block
				500	(?<!`)
				501	\1 # Matching closer
				502	(?!`)
				503	""", re.I\|re.VERBOSE)
				504	def _DoCodeSpans(self, text):
				505	def handler(m):
				506	c = m.group(2)
				507	c = c.strip()
				508	c = self._EncodeCode(c)
				509	return "<code>%s</code>" % c
				510
				511	text = self.r_DoCodeSpans.sub(handler, text)
				512	return text
				513
				514	def _EncodeCode(self, text):
				515	text = text.replace("&","&")
				516	text = text.replace("<","<")
				517	text = text.replace(">",">")
				518	for c in "*_{}[]\\":
				519	text = text.replace(c, self.escapetable[c])
				520	return text
				521
				522
				523	r_DoBold = re.compile(r"(\\\|__) (?=\S) (.+?[_]) (?<=\S) \1", re.VERBOSE \| re.S)
				524	r_DoItalics = re.compile(r"(\*\|_) (?=\S) (.+?) (?<=\S) \1", re.VERBOSE \| re.S)
				525	def _DoItalicsAndBold(self, text):
				526	text = self.r_DoBold.sub(r"<strong>\2</strong>", text)
				527	text = self.r_DoItalics.sub(r"<em>\2</em>", text)
				528	return text
				529
				530	r_start = re.compile(r"^", re.M)
				531	r_DoBlockQuotes1 = re.compile(r"^[ \t]*>[ \t]?", re.M)
				532	r_DoBlockQuotes2 = re.compile(r"^[ \t]+$", re.M)
				533	r_DoBlockQuotes3 = re.compile(r"""
				534	( # Wrap whole match in $1
				535	(
				536	^[ \t]*>[ \t]? # '>' at the start of a line
				537	.+\n # rest of the first line
				538	(.+\n)* # subsequent consecutive lines
				539	\n* # blanks
				540	)+
				541	)""", re.M \| re.VERBOSE)
				542	r_protectpre = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
				543	r_propre = re.compile(r'^ ', re.M)
				544
				545	def _DoBlockQuotes(self, text):
				546	def prehandler(m):
				547	return self.r_propre.sub('', m.group(1))
				548
				549	def handler(m):
				550	bq = m.group(1)
				551	bq = self.r_DoBlockQuotes1.sub("", bq)
				552	bq = self.r_DoBlockQuotes2.sub("", bq)
				553	bq = self._RunBlockGamut(bq)
				554	bq = self.r_start.sub(" ", bq)
				555	bq = self.r_protectpre.sub(prehandler, bq)
				556	return "<blockquote>\n%s\n</blockquote>\n\n" % bq
				557
				558	text = self.r_DoBlockQuotes3.sub(handler, text)
				559	return text
				560
				561	r_tabbed = re.compile(r"^([ \t]*)")
				562	def _FormParagraphs(self, text):
				563	text = text.strip("\n")
				564	grafs = self.r_multiline.split(text)
				565
				566	for g in range(len(grafs)):
				567	t = grafs[g].strip() #@@?
				568	if not self.html_blocks.has_key(t):
				569	t = self._RunSpanGamut(t)
				570	t = self.r_tabbed.sub(r"<p>", t)
				571	t += "</p>"
				572	grafs[g] = t
				573
				574	for g in range(len(grafs)):
				575	t = grafs[g].strip()
				576	if self.html_blocks.has_key(t):
				577	grafs[g] = self.html_blocks[t]
				578
				579	return "\n\n".join(grafs)
				580
				581	r_EncodeAmps = re.compile(r"&(?!#?[xX]?(?:[0-9a-fA-F]+\|\w+);)")
				582	r_EncodeAngles = re.compile(r"<(?![a-z/?\$!])")
				583	def _EncodeAmpsAndAngles(self, text):
				584	text = self.r_EncodeAmps.sub("&", text)
				585	text = self.r_EncodeAngles.sub("<", text)
				586	return text
				587
				588	def _EncodeBackslashEscapes(self, text):
				589	for char in self.escapechars:
				590	text = text.replace("\\" + char, self.escapetable[char])
				591	return text
				592
				593	r_link = re.compile(r"<((https?\|ftp):[^\'\">\s]+)>", re.I)
				594	r_email = re.compile(r"""
				595	<
				596	(?:mailto:)?
				597	(
				598	[-.\w]+
				599	\@
				600	[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
				601	)
				602	>""", re.VERBOSE\|re.I)
				603	def _DoAutoLinks(self, text):
				604	text = self.r_link.sub(r'<a href="\1">\1</a>', text)
				605
				606	def handler(m):
				607	l = m.group(1)
				608	return self._EncodeEmailAddress(self._UnescapeSpecialChars(l))
				609
				610	text = self.r_email.sub(handler, text)
				611	return text
				612
				613	r_EncodeEmailAddress = re.compile(r">.+?:")
				614	def _EncodeEmailAddress(self, text):
				615	encode = [
				616	lambda x: "&#%s;" % ord(x),
				617	lambda x: "&#x%X;" % ord(x),
				618	lambda x: x
				619	]
				620
				621	text = "mailto:" + text
				622	addr = ""
				623	for c in text:
				624	if c == ':': addr += c; continue
				625
				626	r = semirandom(addr)
				627	if r < 0.45:
				628	addr += encode[1](c)
				629	elif r > 0.9 and c != '@':
				630	addr += encode[2](c)
				631	else:
				632	addr += encode[0](c)
				633
				634	text = '<a href="%s">%s</a>' % (addr, addr)
				635	text = self.r_EncodeEmailAddress.sub('>', text)
				636	return text
				637
				638	def _UnescapeSpecialChars(self, text):
				639	for key in self.escapetable.keys():
				640	text = text.replace(self.escapetable[key], key)
				641	return text
				642
				643	tokenize_depth = 6
				644	tokenize_nested_tags = '\|'.join([r'(?:<[a-z/!$](?:[^<>]'] * tokenize_depth) + (')>)' tokenize_depth)
				645	r_TokenizeHTML = re.compile(
				646	r"""(?: <! ( -- .? -- \s )+ > ) \| # comment
				647	(?: <\? .*? \?> ) \| # processing instruction
				648	%s # nested tags
				649	""" % tokenize_nested_tags, re.I\|re.VERBOSE)
				650	def _TokenizeHTML(self, text):
				651	pos = 0
				652	tokens = []
				653	matchobj = self.r_TokenizeHTML.search(text, pos)
				654	while matchobj:
				655	whole_tag = matchobj.string[matchobj.start():matchobj.end()]
				656	sec_start = matchobj.end()
				657	tag_start = sec_start - len(whole_tag)
				658	if pos < tag_start:
				659	tokens.append(["text", matchobj.string[pos:tag_start]])
				660
				661	tokens.append(["tag", whole_tag])
				662	pos = sec_start
				663	matchobj = self.r_TokenizeHTML.search(text, pos)
				664
				665	if pos < len(text):
				666	tokens.append(["text", text[pos:]])
				667	return tokens
				668
				669	r_Outdent = re.compile(r"""^(\t\|[ ]{1,%d})""" % tabwidth, re.M)
				670	def _Outdent(self, text):
				671	text = self.r_Outdent.sub("", text)
				672	return text
				673
				674	def _Detab(self, text): return text.expandtabs(self.tabwidth)
				675
				676	def Markdown(args, kw): return _Markdown().parse(args, **kw)
				677	markdown = Markdown
				678
				679	if __name__ == '__main__':
				680	if len(sys.argv) > 1:
				681	print(Markdown(open(sys.argv[1]).read()))
				682	else:
				683	print(Markdown(sys.stdin.read()))