added script and pipline for scheduled link rot checker

krishnaduttPanchagnula · krishnaduttPanchagnula · commit 6b72f56efac7 · 2025-06-27T10:18:30.000+05:30
Signed-off-by: krishnaduttPanchagnula &lt;krishnadutt123@gmail.com&gt;
diff --git a/.github/workflows/lin-rot-checker.yml b/.github/workflows/lin-rot-checker.yml
@@ -0,0 +1,20 @@
+name: Link rot Checker Scheduled Job
+
+on:
+  schedule:
+    - cron: '0 8 * * *'
+
+jobs:
+  run-python:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Execute Python script
+        run: python your_script.py
diff --git a/hack/lin-rot-checker.py b/hack/lin-rot-checker.py
@@ -0,0 +1,27 @@
+import re
+import glob
+import requests
+
+# Regex pattern to extract URLs from markdown links [text](url)
+pattern = re.compile(r'\[.*?\]\((.*?)\)')
+
+links = []
+
+# Iterate over all .md files in all directories inclusing childern
+for filename in glob.glob("**/*.md",recursive=True):
+    with open(filename, 'r', encoding='utf-8') as f:
+        print(f"Processing file: {filename}")
+        content = f.read()
+        found_links = pattern.findall(content)
+        links.extend(found_links)
+
+print(f"Extracted links:{links}")
+
+for link in links:
+    try:
+        if requests.head(link).status_code==200:
+            print(f'{link} link is valid')
+        else:
+            print (f'{link} is not valid')
+    except requests.exceptions.RequestException as e:
+        print("The link has exceeded the dns resolution limit and failed")