diff --git a/README.md b/README.md index 9bbb0ca..15dbfcf 100644 --- a/README.md +++ b/README.md @@ -240,6 +240,8 @@ If you run into any issues, consult the logs or reach out on the repository's [I --- # Changelog +- v0.7165 - Parsing improvements + - Improved text formatting & escaping in complex markdown vs. html cases - v0.7614 - Better stock market data fetching from Yahoo Finance - Changes made to `src/api_get_stock_prices_yfinance.py` - => More accurate ticker symbol searches, fallbacks, multi-day data etc. diff --git a/config/config.ini b/config/config.ini index df0e838..0a29864 100644 --- a/config/config.ini +++ b/config/config.ini @@ -125,7 +125,8 @@ Enabled = True # The preferred, more capable model to use by default (e.g., gpt-4o, gpt-4.5-preview). # This model will be used until its daily token limit (PremiumTokenLimit) is reached. -PremiumModel = gpt-4o +# PremiumModel = gpt-4o +PremiumModel = gpt-4.1 # The cheaper model to switch to when the PremiumTokenLimit is reached (e.g., gpt-4o-mini). # This model has its own daily token limit (MiniTokenLimit). diff --git a/src/main.py b/src/main.py index 8047c4f..c4a43ab 100755 --- a/src/main.py +++ b/src/main.py @@ -6,7 +6,7 @@ # https://github.com/FlyingFathead/TelegramBot-OpenAI-API # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # version of this program -version_number = "0.7614" +version_number = "0.7615" # Add the project root directory to Python's path import sys diff --git a/src/modules.py b/src/modules.py index 33a0606..4d53271 100644 --- a/src/modules.py +++ b/src/modules.py @@ -106,47 +106,113 @@ def preserve_html_and_escape_text(text): escaped_text += html.escape(text[last_end:]) return escaped_text -# markdown to html parsing (v0.737.2) +# v0.7615 def markdown_to_html(text): - try: - # Handle the code blocks with optional language specification first - def replace_codeblock(match): - codeblock = match.group(2) # Get the actual code inside the block - language = match.group(1) # Get the language identifier - escaped_code = html.escape(codeblock.strip()) - if language: - return f'
{escaped_code}'
- else:
- return f'{escaped_code}'
-
- # Replace code blocks with tags
- text = re.sub(r'```(\w+)?\n([\s\S]*?)```', replace_codeblock, text)
-
- # Now handle Markdown links and convert them to HTML
- def replace_markdown_link(match):
- link_text = match.group(1) # The text to display
- url = match.group(2) # The URL
- return f'{html.escape(link_text)}'
-
- # Replace Markdown links [text](url) with HTML tags
- text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)
-
- # Handle inline code and other markdown elements
- text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
- text = re.sub(r'\*(.*?)\*', r'\1', text)
- text = re.sub(r'_(.*?)_', r'\1', text)
- text = re.sub(r'`([^`]*)`', r'\1', text)
- text = re.sub(r'######\s*(.*)', r'➤ \1', text)
- text = re.sub(r'#####\s*(.*)', r'➤ \1', text)
- text = re.sub(r'####\s*(.*)', r'➤ \1', text)
- text = re.sub(r'###\s*(.*)', r'➤ \1', text)
- text = re.sub(r'##\s*(.*)', r'➤ \1', text)
- text = re.sub(r'#\s*(.*)', r'➤ \1', text)
-
- return text
+ """
+ Convert a simple subset of Markdown to HTML,
+ ensuring that code blocks are extracted first so they
+ don't get accidentally transformed by heading/bold/italic rules.
+ """
+ # 1) Extract code blocks into placeholders
+ code_blocks = []
+
+ def extract_codeblock(match):
+ language = match.group(1) or "" # i.e. "python"
+ code_body = match.group(2) # the code text
+ code_blocks.append((language, code_body))
+ placeholder_index = len(code_blocks) - 1
+ # Return a placeholder token like [CODEBLOCK_0]
+ return f"[CODEBLOCK_{placeholder_index}]"
+
+ # Regex: triple backticks with optional language
+ # Use DOTALL ([\s\S]) so it can capture newlines
+ text = re.sub(
+ r'```(\w+)?\n([\s\S]*?)```',
+ extract_codeblock,
+ text
+ )
+
+ # 2) Now do the normal Markdown parsing on whatever’s left (outside code blocks)
+
+ # Headings: only match at the start of lines (via ^) and multiline
+ text = re.sub(r'^(######)\s+(.*)', r'➤ \2', text, flags=re.MULTILINE)
+ text = re.sub(r'^(#####)\s+(.*)', r'➤ \2', text, flags=re.MULTILINE)
+ text = re.sub(r'^(####)\s+(.*)', r'➤ \2', text, flags=re.MULTILINE)
+ text = re.sub(r'^(###)\s+(.*)', r'➤ \2', text, flags=re.MULTILINE)
+ text = re.sub(r'^(##)\s+(.*)', r'➤ \2', text, flags=re.MULTILINE)
+ text = re.sub(r'^#\s+(.*)', r'➤ \1', text, flags=re.MULTILINE)
+
+ # Links of the form [text](url)
+ def replace_markdown_link(m):
+ link_text = m.group(1)
+ url = m.group(2)
+ # Escape any HTML entities in the URL or text
+ return f'{html.escape(link_text)}'
+ text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)
+
+ # Bold
+ text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
+
+ # Italics: also handle both `*text*` and `_text_`
+ text = re.sub(r'\*(.*?)\*', r'\1', text)
+ text = re.sub(r'_(.*?)_', r'\1', text)
+
+ # Inline code with single backticks
+ text = re.sub(r'`([^`]*)`', r'\1', text)
+
+ # 3) Re‐insert the code blocks
+ for i, (language, code_body) in enumerate(code_blocks):
+ escaped_code = html.escape(code_body.strip())
+ if language:
+ block_html = f'{escaped_code}
'
+ else:
+ block_html = f'{escaped_code}
'
+ # Replace [CODEBLOCK_i] with the final block
+ text = text.replace(f"[CODEBLOCK_{i}]", block_html, 1)
- except Exception as e:
- return str(e)
+ return text
+
+# # markdown to html parsing (v0.737.2)
+# def markdown_to_html(text):
+# try:
+# # Handle the code blocks with optional language specification first
+# def replace_codeblock(match):
+# codeblock = match.group(2) # Get the actual code inside the block
+# language = match.group(1) # Get the language identifier
+# escaped_code = html.escape(codeblock.strip())
+# if language:
+# return f'{escaped_code}
'
+# else:
+# return f'{escaped_code}
'
+
+# # Replace code blocks with tags
+# text = re.sub(r'```(\w+)?\n([\s\S]*?)```', replace_codeblock, text)
+
+# # Now handle Markdown links and convert them to HTML
+# def replace_markdown_link(match):
+# link_text = match.group(1) # The text to display
+# url = match.group(2) # The URL
+# return f'{html.escape(link_text)}'
+
+# # Replace Markdown links [text](url) with HTML tags
+# text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)
+
+# # Handle inline code and other markdown elements
+# text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
+# text = re.sub(r'\*(.*?)\*', r'\1', text)
+# text = re.sub(r'_(.*?)_', r'\1', text)
+# text = re.sub(r'`([^`]*)`', r'\1', text)
+# text = re.sub(r'######\s*(.*)', r'➤ \1', text)
+# text = re.sub(r'#####\s*(.*)', r'➤ \1', text)
+# text = re.sub(r'####\s*(.*)', r'➤ \1', text)
+# text = re.sub(r'###\s*(.*)', r'➤ \1', text)
+# text = re.sub(r'##\s*(.*)', r'➤ \1', text)
+# text = re.sub(r'#\s*(.*)', r'➤ \1', text)
+
+# return text
+
+# except Exception as e:
+# return str(e)
# Check and update the global rate limit.
def check_global_rate_limit(max_requests_per_minute, global_request_count, rate_limit_reset_time):