@@ -106,47 +106,113 @@ def preserve_html_and_escape_text(text):
106106 escaped_text += html .escape (text [last_end :])
107107 return escaped_text
108108
109- # markdown to html parsing ( v0.737.2)
109+ # v0.7615
110110def markdown_to_html (text ):
111- try :
112- # Handle the code blocks with optional language specification first
113- def replace_codeblock (match ):
114- codeblock = match .group (2 ) # Get the actual code inside the block
115- language = match .group (1 ) # Get the language identifier
116- escaped_code = html .escape (codeblock .strip ())
117- if language :
118- return f'<pre><code class="language-{ language } ">{ escaped_code } </code></pre>'
119- else :
120- return f'<pre><code>{ escaped_code } </code></pre>'
121-
122- # Replace code blocks with <pre><code> tags
123- text = re .sub (r'```(\w+)?\n([\s\S]*?)```' , replace_codeblock , text )
124-
125- # Now handle Markdown links and convert them to HTML
126- def replace_markdown_link (match ):
127- link_text = match .group (1 ) # The text to display
128- url = match .group (2 ) # The URL
129- return f'<a href="{ html .escape (url )} ">{ html .escape (link_text )} </a>'
130-
131- # Replace Markdown links [text](url) with HTML <a> tags
132- text = re .sub (r'\[([^\]]+)\]\(([^)]+)\)' , replace_markdown_link , text )
133-
134- # Handle inline code and other markdown elements
135- text = re .sub (r'\*\*(.*?)\*\*' , r'<b>\1</b>' , text )
136- text = re .sub (r'\*(.*?)\*' , r'<i>\1</i>' , text )
137- text = re .sub (r'_(.*?)_' , r'<i>\1</i>' , text )
138- text = re .sub (r'`([^`]*)`' , r'<code>\1</code>' , text )
139- text = re .sub (r'######\s*(.*)' , r'➤ <b>\1</b>' , text )
140- text = re .sub (r'#####\s*(.*)' , r'➤ <b>\1</b>' , text )
141- text = re .sub (r'####\s*(.*)' , r'➤ <b>\1</b>' , text )
142- text = re .sub (r'###\s*(.*)' , r'➤ <b>\1</b>' , text )
143- text = re .sub (r'##\s*(.*)' , r'➤ <b>\1</b>' , text )
144- text = re .sub (r'#\s*(.*)' , r'➤ <b>\1</b>' , text )
145-
146- return text
111+ """
112+ Convert a simple subset of Markdown to HTML,
113+ ensuring that code blocks are extracted first so they
114+ don't get accidentally transformed by heading/bold/italic rules.
115+ """
116+ # 1) Extract code blocks into placeholders
117+ code_blocks = []
118+
119+ def extract_codeblock (match ):
120+ language = match .group (1 ) or "" # i.e. "python"
121+ code_body = match .group (2 ) # the code text
122+ code_blocks .append ((language , code_body ))
123+ placeholder_index = len (code_blocks ) - 1
124+ # Return a placeholder token like [CODEBLOCK_0]
125+ return f"[CODEBLOCK_{ placeholder_index } ]"
126+
127+ # Regex: triple backticks with optional language
128+ # Use DOTALL ([\s\S]) so it can capture newlines
129+ text = re .sub (
130+ r'```(\w+)?\n([\s\S]*?)```' ,
131+ extract_codeblock ,
132+ text
133+ )
134+
135+ # 2) Now do the normal Markdown parsing on whatever’s left (outside code blocks)
136+
137+ # Headings: only match at the start of lines (via ^) and multiline
138+ text = re .sub (r'^(######)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
139+ text = re .sub (r'^(#####)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
140+ text = re .sub (r'^(####)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
141+ text = re .sub (r'^(###)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
142+ text = re .sub (r'^(##)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
143+ text = re .sub (r'^#\s+(.*)' , r'➤ <b>\1</b>' , text , flags = re .MULTILINE )
144+
145+ # Links of the form [text](url)
146+ def replace_markdown_link (m ):
147+ link_text = m .group (1 )
148+ url = m .group (2 )
149+ # Escape any HTML entities in the URL or text
150+ return f'<a href="{ html .escape (url )} ">{ html .escape (link_text )} </a>'
151+ text = re .sub (r'\[([^\]]+)\]\(([^)]+)\)' , replace_markdown_link , text )
152+
153+ # Bold
154+ text = re .sub (r'\*\*(.*?)\*\*' , r'<b>\1</b>' , text )
155+
156+ # Italics: also handle both `*text*` and `_text_`
157+ text = re .sub (r'\*(.*?)\*' , r'<i>\1</i>' , text )
158+ text = re .sub (r'_(.*?)_' , r'<i>\1</i>' , text )
159+
160+ # Inline code with single backticks
161+ text = re .sub (r'`([^`]*)`' , r'<code>\1</code>' , text )
162+
163+ # 3) Re‐insert the code blocks
164+ for i , (language , code_body ) in enumerate (code_blocks ):
165+ escaped_code = html .escape (code_body .strip ())
166+ if language :
167+ block_html = f'<pre><code class="language-{ language } ">{ escaped_code } </code></pre>'
168+ else :
169+ block_html = f'<pre><code>{ escaped_code } </code></pre>'
170+ # Replace [CODEBLOCK_i] with the final <pre><code> block
171+ text = text .replace (f"[CODEBLOCK_{ i } ]" , block_html , 1 )
147172
148- except Exception as e :
149- return str (e )
173+ return text
174+
175+ # # markdown to html parsing (v0.737.2)
176+ # def markdown_to_html(text):
177+ # try:
178+ # # Handle the code blocks with optional language specification first
179+ # def replace_codeblock(match):
180+ # codeblock = match.group(2) # Get the actual code inside the block
181+ # language = match.group(1) # Get the language identifier
182+ # escaped_code = html.escape(codeblock.strip())
183+ # if language:
184+ # return f'<pre><code class="language-{language}">{escaped_code}</code></pre>'
185+ # else:
186+ # return f'<pre><code>{escaped_code}</code></pre>'
187+
188+ # # Replace code blocks with <pre><code> tags
189+ # text = re.sub(r'```(\w+)?\n([\s\S]*?)```', replace_codeblock, text)
190+
191+ # # Now handle Markdown links and convert them to HTML
192+ # def replace_markdown_link(match):
193+ # link_text = match.group(1) # The text to display
194+ # url = match.group(2) # The URL
195+ # return f'<a href="{html.escape(url)}">{html.escape(link_text)}</a>'
196+
197+ # # Replace Markdown links [text](url) with HTML <a> tags
198+ # text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)
199+
200+ # # Handle inline code and other markdown elements
201+ # text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)
202+ # text = re.sub(r'\*(.*?)\*', r'<i>\1</i>', text)
203+ # text = re.sub(r'_(.*?)_', r'<i>\1</i>', text)
204+ # text = re.sub(r'`([^`]*)`', r'<code>\1</code>', text)
205+ # text = re.sub(r'######\s*(.*)', r'➤ <b>\1</b>', text)
206+ # text = re.sub(r'#####\s*(.*)', r'➤ <b>\1</b>', text)
207+ # text = re.sub(r'####\s*(.*)', r'➤ <b>\1</b>', text)
208+ # text = re.sub(r'###\s*(.*)', r'➤ <b>\1</b>', text)
209+ # text = re.sub(r'##\s*(.*)', r'➤ <b>\1</b>', text)
210+ # text = re.sub(r'#\s*(.*)', r'➤ <b>\1</b>', text)
211+
212+ # return text
213+
214+ # except Exception as e:
215+ # return str(e)
150216
151217# Check and update the global rate limit.
152218def check_global_rate_limit (max_requests_per_minute , global_request_count , rate_limit_reset_time ):
0 commit comments