From 021690744108563f00ea6272a42dcb0fb821539b Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Mon, 21 Jul 2025 07:46:27 +0530
Subject: [PATCH 01/47] INTEGRITY: Only skip processing for entries with the
 same status when matching key is present.

---
 db_functions.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index a41c409d..4dea844c 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -131,16 +131,18 @@ def insert_fileset(
     # Check if key/megakey already exists, if so, skip insertion (no quotes on purpose)
     if detection:
         with conn.cursor() as cursor:
-            cursor.execute("SELECT id FROM fileset WHERE megakey = %s", (megakey,))
+            cursor.execute(
+                "SELECT id, status FROM fileset WHERE megakey = %s", (megakey,)
+            )
 
             existing_entry = cursor.fetchone()
     else:
         with conn.cursor() as cursor:
-            cursor.execute("SELECT id FROM fileset WHERE `key` = %s", (key,))
+            cursor.execute("SELECT id, status FROM fileset WHERE `key` = %s", (key,))
 
             existing_entry = cursor.fetchone()
 
-    if existing_entry is not None:
+    if (existing_entry is not None) and (status == existing_entry["status"]):
         existing_entry = existing_entry["id"]
         with conn.cursor() as cursor:
             cursor.execute("SET @fileset_last = %s", (existing_entry,))

From 90be689471f2f9bdf1901b64bd839674d60be85c Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Mon, 21 Jul 2025 07:53:46 +0530
Subject: [PATCH 02/47] INTEGRITY: Logging dropped candidates missed earlier.

---
 db_functions.py | 51 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index 4dea844c..f9430827 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1707,6 +1707,8 @@ def set_process(
     set_to_candidate_dict = defaultdict(list)
     id_to_fileset_dict = defaultdict(dict)
 
+    no_candidate_logs = []
+
     # Deep copy to avoid changes in game_data in the loop affecting the lookup map.
     game_data_lookup = {fs["name"]: copy.deepcopy(fs) for fs in game_data}
 
@@ -1755,7 +1757,6 @@ def set_process(
 
         # Separating out the matching logic for glk engine
         engine_name = fileset["sourcefile"].split("-")[0]
-
         (candidate_filesets, fileset_count) = set_filter_candidate_filesets(
             fileset_id, fileset, fileset_count, transaction_id, engine_name, conn
         )
@@ -1768,20 +1769,27 @@ def set_process(
                 fileset["description"] if "description" in fileset else ""
             )
             log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+            console_log_text = f"Early fileset drop as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+            no_candidate_logs.append(console_log_text)
             create_log(
                 escape_string(category_text), user, escape_string(log_text), conn
             )
             dropped_early_no_candidate += 1
             delete_original_fileset(fileset_id, conn)
+            continue
         id_to_fileset_dict[fileset_id] = fileset
         set_to_candidate_dict[fileset_id].extend(candidate_filesets)
 
-    console_message = "Candidate filtering finished."
-    console_log(console_message)
+    for console_log_text in no_candidate_logs:
+        console_log(console_log_text)
+    no_candidate_logs = []
+
     console_message = (
-        f"{dropped_early_no_candidate} Filesets Dropped for No candidates."
+        f"{dropped_early_no_candidate} Filesets Dropped Early for having no candidates."
     )
     console_log(console_message)
+    console_message = "Candidate filtering finished."
+    console_log(console_message)
     console_message = "Looking for duplicates..."
     console_log(console_message)
 
@@ -1848,6 +1856,7 @@ def set_process(
             auto_merged_filesets,
             manual_merged_filesets,
             mismatch_filesets,
+            dropped_early_no_candidate,
         ) = set_perform_match(
             fileset,
             src,
@@ -1861,13 +1870,18 @@ def set_process(
             mismatch_filesets,
             manual_merge_map,
             set_to_candidate_dict,
+            dropped_early_no_candidate,
+            no_candidate_logs,
             conn,
             skiplog,
         )
-
         match_count += 1
+
     console_log("Matching performed.")
 
+    for console_log_text in no_candidate_logs:
+        console_log(console_log_text)
+
     with conn.cursor() as cursor:
         for fileset_id, candidates in manual_merge_map.items():
             if len(candidates) == 0:
@@ -1878,15 +1892,17 @@ def set_process(
                     fileset["description"] if "description" in fileset else ""
                 )
                 log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+                console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name}, Description: {fileset_description}."
+                console_log(console_log_text)
                 create_log(
                     escape_string(category_text), user, escape_string(log_text), conn
                 )
                 dropped_early_no_candidate += 1
+                manual_merged_filesets -= 1
                 delete_original_fileset(fileset_id, conn)
             else:
                 category_text = "Manual Merge Required"
                 log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
-                manual_merged_filesets += 1
                 add_manual_merge(
                     candidates,
                     fileset_id,
@@ -1962,14 +1978,30 @@ def set_perform_match(
     mismatch_filesets,
     manual_merge_map,
     set_to_candidate_dict,
+    dropped_early_no_candidate,
+    no_candidate_logs,
     conn,
     skiplog,
 ):
     """
-    "Performs matching for set.dat"
+    Performs matching for set.dat
     """
     with conn.cursor() as cursor:
-        if len(candidate_filesets) == 1:
+        if len(candidate_filesets) == 0:
+            category_text = "Drop fileset - No Candidates"
+            fileset_name = fileset["name"] if "name" in fileset else ""
+            fileset_description = (
+                fileset["description"] if "description" in fileset else ""
+            )
+            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+            console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name}, Description: {fileset_description}."
+            no_candidate_logs.append(console_log_text)
+            create_log(
+                escape_string(category_text), user, escape_string(log_text), conn
+            )
+            dropped_early_no_candidate += 1
+            delete_original_fileset(fileset_id, conn)
+        elif len(candidate_filesets) == 1:
             matched_fileset_id = candidate_filesets[0]
             cursor.execute(
                 "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
@@ -2032,12 +2064,14 @@ def set_perform_match(
 
         elif len(candidate_filesets) > 1:
             manual_merge_map[fileset_id] = candidate_filesets
+            manual_merged_filesets += 1
 
     return (
         fully_matched_filesets,
         auto_merged_filesets,
         manual_merged_filesets,
         mismatch_filesets,
+        dropped_early_no_candidate,
     )
 
 
@@ -2247,6 +2281,7 @@ def set_filter_candidate_filesets(
                 filesize = f["size"]
                 if is_glk and (filesize in set_glk_file_size or filesize == 0):
                     count += 1
+                    continue
                 if (filename, filesize) in set_file_name_size:
                     if filesize == -1:
                         count += 1

From d6f17b577d42c4751da408d235cef6cff6ab386c Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Tue, 22 Jul 2025 02:04:25 +0530
Subject: [PATCH 03/47] INTEGRITY: Add fileset creation details in log when new
 fileset is not deleted.

---
 db_functions.py | 77 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 58 insertions(+), 19 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index f9430827..18367886 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -183,10 +183,10 @@ def insert_fileset(
 
     log_text = f"Created Fileset:{fileset_last}, {log_text}"
     if src == "user":
-        log_text = f"Created Fileset:{fileset_last}, from user: IP {ip}, {log_text}"
+        log_text = f"Created Fileset:{fileset_last}, from user: IP {ip}."
 
     user = f"cli:{getpass.getuser()}" if username is None else username
-    if not skiplog:
+    if not skiplog and detection:
         log_last = create_log(
             escape_string(category_text), user, escape_string(log_text), conn
         )
@@ -1033,7 +1033,7 @@ def scan_process(
             fileset_description = (
                 fileset["description"] if "description" in fileset else ""
             )
-            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
             create_log(
                 escape_string(category_text), user, escape_string(log_text), conn
             )
@@ -1169,6 +1169,8 @@ def scan_perform_match(
         Put them for manual merge.
     """
     with conn.cursor() as cursor:
+        fileset_name = fileset["name"] if "name" in fileset else ""
+        fileset_description = fileset["description"] if "description" in fileset else ""
         if len(candidate_filesets) == 1:
             matched_fileset_id = candidate_filesets[0]
             cursor.execute(
@@ -1180,6 +1182,15 @@ def scan_perform_match(
             if status == "partial":
                 # Partial filesets contain all the files, so does the scanned filesets, so this case should not ideally happen.
                 if total_files(matched_fileset_id, conn) > total_fileset_files(fileset):
+                    log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
+                    category_text = "Uploaded from scan."
+                    create_log(
+                        escape_string(category_text),
+                        user,
+                        escape_string(log_text),
+                        conn,
+                    )
+                    console_log(log_text)
                     category_text = "Missing files"
                     log_text = f"Missing files in Fileset:{fileset_id}. Try manual merge with Fileset:{matched_fileset_id}."
                     add_manual_merge(
@@ -1229,6 +1240,15 @@ def scan_perform_match(
                         automatic_merged_filesets += 1
 
                 else:
+                    log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
+                    category_text = "Uploaded from scan."
+                    create_log(
+                        escape_string(category_text),
+                        user,
+                        escape_string(log_text),
+                        conn,
+                    )
+                    console_log(log_text)
                     category_text = "Manual Merge - Detection found"
                     log_text = f"Matched with detection. Merge Fileset:{fileset_id} manually with Fileset:{matched_fileset_id}."
                     add_manual_merge(
@@ -1269,6 +1289,12 @@ def scan_perform_match(
                 delete_original_fileset(fileset_id, conn)
 
         elif len(candidate_filesets) > 1:
+            log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
+            category_text = "Uploaded from scan."
+            create_log(
+                escape_string(category_text), user, escape_string(log_text), conn
+            )
+            console_log(log_text)
             category_text = "Manual Merge - Multiple Candidates"
             log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidate_filesets)}."
             manual_merged_filesets += 1
@@ -1768,8 +1794,8 @@ def set_process(
             fileset_description = (
                 fileset["description"] if "description" in fileset else ""
             )
-            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
-            console_log_text = f"Early fileset drop as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
+            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
+            console_log_text = f"Early fileset drop as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
             no_candidate_logs.append(console_log_text)
             create_log(
                 escape_string(category_text), user, escape_string(log_text), conn
@@ -1829,7 +1855,7 @@ def set_process(
                 fileset_description = (
                     fileset["description"] if "description" in fileset else ""
                 )
-                log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name}, Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})"
+                log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name} Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})"
                 console_log(log_text)
                 create_log(
                     escape_string(category_text), user, escape_string(log_text), conn
@@ -1884,15 +1910,15 @@ def set_process(
 
     with conn.cursor() as cursor:
         for fileset_id, candidates in manual_merge_map.items():
+            fileset = id_to_fileset_dict[fileset_id]
+            fileset_name = fileset["name"] if "name" in fileset else ""
+            fileset_description = (
+                fileset["description"] if "description" in fileset else ""
+            )
             if len(candidates) == 0:
                 category_text = "Drop fileset - No Candidates"
-                fileset = id_to_fileset_dict[fileset_id]
-                fileset_name = fileset["name"] if "name" in fileset else ""
-                fileset_description = (
-                    fileset["description"] if "description" in fileset else ""
-                )
-                log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
-                console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name}, Description: {fileset_description}."
+                log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
+                console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name} Description: {fileset_description}."
                 console_log(console_log_text)
                 create_log(
                     escape_string(category_text), user, escape_string(log_text), conn
@@ -1901,6 +1927,12 @@ def set_process(
                 manual_merged_filesets -= 1
                 delete_original_fileset(fileset_id, conn)
             else:
+                log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
+                category_text = "Uploaded from dat."
+                create_log(
+                    escape_string(category_text), user, escape_string(log_text), conn
+                )
+                console_log(log_text)
                 category_text = "Manual Merge Required"
                 log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
                 add_manual_merge(
@@ -1987,14 +2019,12 @@ def set_perform_match(
     Performs matching for set.dat
     """
     with conn.cursor() as cursor:
+        fileset_name = fileset["name"] if "name" in fileset else ""
+        fileset_description = fileset["description"] if "description" in fileset else ""
         if len(candidate_filesets) == 0:
             category_text = "Drop fileset - No Candidates"
-            fileset_name = fileset["name"] if "name" in fileset else ""
-            fileset_description = (
-                fileset["description"] if "description" in fileset else ""
-            )
-            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name}, Description: {fileset_description}."
-            console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name}, Description: {fileset_description}."
+            log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
+            console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name} Description: {fileset_description}."
             no_candidate_logs.append(console_log_text)
             create_log(
                 escape_string(category_text), user, escape_string(log_text), conn
@@ -2048,6 +2078,15 @@ def set_perform_match(
                     delete_original_fileset(fileset_id, conn)
 
                 else:
+                    log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
+                    category_text = "Uploaded from dat."
+                    create_log(
+                        escape_string(category_text),
+                        user,
+                        escape_string(log_text),
+                        conn,
+                    )
+                    console_log(log_text)
                     category_text = "Mismatch"
                     log_text = f"Fileset:{fileset_id} mismatched with Fileset:{matched_fileset_id} with status:{status}. Try manual merge. Unmatched Files in set.dat fileset = {len(unmatched_dat_files)} Unmatched Files in candidate fileset = {len(unmatched_candidate_files)}. List of unmatched files scan.dat : {', '.join(scan_file for scan_file in unmatched_dat_files)}, List of unmatched files full fileset : {', '.join(scan_file for scan_file in unmatched_candidate_files)}"
                     console_log(log_text)

From 404898ea7e498b5a2be4b7d48d39151a049bdeb5 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Tue, 22 Jul 2025 12:52:41 +0530
Subject: [PATCH 04/47] INTEGRITY: Fix placeholder for widetable query.

---
 clear.py   | 2 +-
 fileset.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/clear.py b/clear.py
index acdae141..ccc5588c 100644
--- a/clear.py
+++ b/clear.py
@@ -19,7 +19,7 @@ def truncate_all_tables(conn):
 
     for table in tables:
         try:
-            cursor.execute("TRUNCATE TABLE %s", (table,))
+            cursor.execute(f"TRUNCATE TABLE `{table}`")
             print(f"Table '{table}' truncated successfully")
         except pymysql.Error as err:
             print(f"Error truncating table '{table}': {err}")
diff --git a/fileset.py b/fileset.py
index a45556eb..d43df8f8 100644
--- a/fileset.py
+++ b/fileset.py
@@ -266,8 +266,7 @@ def fileset():
             if widetable == "full":
                 file_ids = [file["id"] for file in result]
                 cursor.execute(
-                    "SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN (%s)",
-                    (",".join(map(str, file_ids)),),
+                    f"SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN ({','.join(map(str, file_ids))})"
                 )
                 checksums = cursor.fetchall()
 

From 97961a79d147f8fa57c4a35ed972f00590386f99 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 23 Jul 2025 20:22:44 +0530
Subject: [PATCH 05/47] INTEGRITY: Redirect fileset url if id exceeds the
 bounds.

---
 fileset.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fileset.py b/fileset.py
index d43df8f8..fdc7e2ab 100644
--- a/fileset.py
+++ b/fileset.py
@@ -118,6 +118,11 @@ def fileset():
             cursor.execute("SELECT MAX(id) FROM fileset")
             max_id = cursor.fetchone()["MAX(id)"]
 
+            if id > max_id:
+                return redirect(f"/fileset?id={max_id}")
+            if id < min_id:
+                return redirect(f"/fileset?id={min_id}")
+
             # Ensure the id is between the minimum and maximum id
             id = max(min_id, min(id, max_id))
 

From ab293d97cebdcc67ec7998e94f222787a69201b4 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 23 Jul 2025 23:14:50 +0530
Subject: [PATCH 06/47] INTEGRITY: Fix the issue for filesets with null field
 not being filtered.

---
 fileset.py    |  2 +-
 pagination.py | 68 +++++++++++++++++++++++----------------------------
 2 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/fileset.py b/fileset.py
index fdc7e2ab..9906e25b 100644
--- a/fileset.py
+++ b/fileset.py
@@ -1229,10 +1229,10 @@ def fileset_search():
     order = "ORDER BY fileset.id"
     filters = {
         "fileset": "fileset",
-        "gameid": "game",
         "extra": "game",
         "platform": "game",
         "language": "game",
+        "gameid": "game",
         "megakey": "fileset",
         "status": "fileset",
         "transaction": "transactions",
diff --git a/pagination.py b/pagination.py
index 091384ce..5b124828 100644
--- a/pagination.py
+++ b/pagination.py
@@ -101,8 +101,6 @@ def create_page(
 
         num_of_pages = (num_of_results + results_per_page - 1) // results_per_page
         print(f"Num of results: {num_of_results}, Num of pages: {num_of_pages}")
-        if num_of_results == 0:
-            return "No results for given filters"
 
         page = int(request.args.get("page", 1))
         page = max(1, min(page, num_of_pages))
@@ -118,11 +116,12 @@ def create_page(
                 value = pymysql.converters.escape_string(value)
                 if value == "":
                     value = ".*"
-                condition += (
-                    f" AND {filters[key]}.{'id' if key == 'fileset' else key} REGEXP '{value}'"
-                    if condition != "WHERE "
-                    else f"{filters[key]}.{'id' if key == 'fileset' else key} REGEXP '{value}'"
-                )
+                field = f"{filters[key]}.{'id' if key == 'fileset' else key}"
+                if value == ".*":
+                    clause = f"({field} IS NULL OR {field} REGEXP '{value}')"
+                else:
+                    clause = f"{field} REGEXP '{value}'"
+                condition += f" AND {clause}" if condition != "WHERE " else clause
 
             if condition == "WHERE ":
                 condition = ""
@@ -149,39 +148,32 @@ def create_page(
 <form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
 <table style="margin-top: 80px;">
 """
-    if not results:
-        return "No results for given filters"
-    if results:
-        if filters:
-            if records_table != "log":
-                html += "<tr class='filter'><td></td><td></td>"
-            else:
-                html += "<tr class='filter'><td></td>"
+    if filters:
+        if records_table != "log":
+            html += "<tr class='filter'><td></td><td></td>"
+        else:
+            html += "<tr class='filter'><td></td>"
 
-            for key in results[0].keys():
-                if key not in filters:
-                    html += "<td class='filter'></td>"
-                    continue
-                filter_value = request.args.get(key, "")
-                html += f"<td class='filter'><input type='text' class='filter' placeholder='{key}' name='{key}' value='{filter_value}'/></td>"
-            html += "</tr><tr class='filter'><td></td><td></td><td class='filter'><input type='submit' value='Submit'></td></tr>"
+        for key in filters.keys():
+            filter_value = request.args.get(key, "")
+            html += f"<td class='filter'><input type='text' class='filter' placeholder='{key}' name='{key}' value='{filter_value}'/></td>"
+        html += "</tr><tr class='filter'><td></td><td></td><td class='filter'><input type='submit' value='Submit'></td></tr>"
 
-        html += "<th>#</th>"
-        if records_table != "log":
-            html += "<th>Fileset ID</th>"
-        for key in results[0].keys():
-            if key in ["fileset", "fileset_id"]:
-                continue
-            vars = "&".join(
-                [f"{k}={v}" for k, v in request.args.items() if k != "sort"]
-            )
-            sort = request.args.get("sort", "")
-            if sort == key:
-                vars += f"&sort={key}-desc"
-            else:
-                vars += f"&sort={key}"
-            html += f"<th><a href='{filename}?{vars}'>{key}</a></th>"
+    html += "<th>#</th>"
+    if records_table != "log":
+        html += "<th>Fileset ID</th>"
+    for key in filters.keys():
+        if key in ["fileset", "fileset_id"]:
+            continue
+        vars = "&".join([f"{k}={v}" for k, v in request.args.items() if k != "sort"])
+        sort = request.args.get("sort", "")
+        if sort == key:
+            vars += f"&sort={key}-desc"
+        else:
+            vars += f"&sort={key}"
+        html += f"<th><a href='{filename}?{vars}'>{key}</a></th>"
 
+    if results:
         counter = offset + 1
         for row in results:
             if counter == offset + 1:  # If it is the first run of the loop
@@ -232,6 +224,8 @@ def create_page(
             counter += 1
 
     html += "</table></form>"
+    if not results:
+        html += "<h1>No results for given filters</h1>"
 
     # Pagination
     vars = "&".join([f"{k}={v}" for k, v in request.args.items() if k != "page"])

From 6d8c4bf7aabd936af88e0b42d3dd318b8166a4b4 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 23 Jul 2025 23:44:09 +0530
Subject: [PATCH 07/47] INTEGRITY: Join game table before engine table.

---
 pagination.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pagination.py b/pagination.py
index 5b124828..899203a2 100644
--- a/pagination.py
+++ b/pagination.py
@@ -74,7 +74,11 @@ def create_page(
 
             # Handle multiple tables
             from_query = records_table
-            tables_list = list(tables)
+            join_order = ["game", "engine"]
+            tables_list = sorted(
+                list(tables),
+                key=lambda t: join_order.index(t) if t in join_order else 99,
+            )
             if records_table not in tables_list or len(tables_list) > 1:
                 for table in tables_list:
                     if table == records_table:

From 78cbabcb19bf65dc8e7d716111c6e9880bee0f72 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 23 Jul 2025 23:44:37 +0530
Subject: [PATCH 08/47] INTEGRITY: Add max and min pages in dashboard.

---
 pagination.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pagination.py b/pagination.py
index 899203a2..8497ec4d 100644
--- a/pagination.py
+++ b/pagination.py
@@ -241,8 +241,8 @@ def create_page(
                 html += f"<input type='hidden' name='{key}' value='{value}'>"
         html += "<div class='pagination'>"
         if page > 1:
-            html += f"<a href='{filename}?{vars}'>❮❮</a>"
-            html += f"<a href='{filename}?page={page - 1}&{vars}'>❮</a>"
+            html += f"<a href='{filename}?{vars}'>1</a>"
+            html += f"<a href='{filename}?page={page - 1}&{vars}'>Prev</a>"
         if page - 2 > 1:
             html += "<div class='more'>...</div>"
         for i in range(page - 2, page + 3):
@@ -256,8 +256,10 @@ def create_page(
         if page + 2 < num_of_pages:
             html += "<div class='more'>...</div>"
         if page < num_of_pages:
-            html += f"<a href='{filename}?page={page + 1}&{vars}'>❯</a>"
-            html += f"<a href='{filename}?page={num_of_pages}&{vars}'>❯❯</a>"
+            html += f"<a href='{filename}?page={page + 1}&{vars}'>Next</a>"
+            html += (
+                f"<a href='{filename}?page={num_of_pages}&{vars}'>{num_of_pages}</a>"
+            )
         html += "<input type='text' name='page' placeholder='Page No'>"
         html += "<input type='submit' value='Submit'>"
         html += "</div></form>"

From 28d83ba0c9cfac22a7f7aaafb60cf3d8b4f2f9fa Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 25 Jul 2025 13:59:25 +0530
Subject: [PATCH 09/47] INTEGRITY: Improve merge workflow.

---
 fileset.py                              | 541 +++++++++++++-----------
 static/js/confirm_merge_form_handler.js |  85 ++++
 2 files changed, 389 insertions(+), 237 deletions(-)
 create mode 100644 static/js/confirm_merge_form_handler.js

diff --git a/fileset.py b/fileset.py
index 9906e25b..7ee5dd85 100644
--- a/fileset.py
+++ b/fileset.py
@@ -8,6 +8,7 @@
 )
 import pymysql.cursors
 import json
+import html as html_lib
 import os
 from user_fileset_functions import (
     user_insert_fileset,
@@ -16,13 +17,14 @@
 from pagination import create_page
 import difflib
 from db_functions import (
-    find_matching_filesets,
     get_all_related_filesets,
     convert_log_text_to_links,
     user_integrity_check,
     db_connect,
     create_log,
     db_connect_root,
+    get_checksum_props,
+    delete_original_fileset,
 )
 from collections import defaultdict
 from schema import init_database
@@ -159,8 +161,7 @@ def fileset():
             <table>
             """
             html += f"<button type='button' onclick=\"location.href='/fileset/{id}/merge'\">Manual Merge</button>"
-            html += f"<button type='button' onclick=\"location.href='/fileset/{id}/match'\">Match and Merge</button>"
-            html += f"<button type='button' onclick=\"location.href='/fileset/{id}/possible_merge'\">Possible Merges</button>"
+            # html += f"<button type='button' onclick=\"location.href='/fileset/{id}/possible_merge'\">Possible Merges</button>"
             html += f"""
                     <form action="/fileset/{id}/mark_full" method="post" style="display:inline;">
                         <button type='submit'>Mark as full</button>
@@ -334,7 +335,6 @@ def fileset():
             # Generate the HTML for the developer actions
             html += "<h3>Developer Actions</h3>"
             html += f"<button id='delete-button' type='button' onclick='delete_id({id})'>Mark Fileset for Deletion</button>"
-            html += f"<button id='match-button' type='button' onclick='match_id({id})'>Match and Merge Fileset</button>"
 
             if "delete" in request.form:
                 cursor.execute(
@@ -419,121 +419,46 @@ def fileset():
                 html += "</tr>\n"
 
             html += "</table>\n"
-            return render_template_string(html)
-    finally:
-        connection.close()
-
-
-@app.route("/fileset/<int:id>/match", methods=["GET"])
-def match_fileset_route(id):
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-    config_path = os.path.join(base_dir, "mysql_config.json")
-    with open(config_path) as f:
-        mysql_cred = json.load(f)
-
-    connection = pymysql.connect(
-        host=mysql_cred["servername"],
-        user=mysql_cred["username"],
-        password=mysql_cred["password"],
-        db=mysql_cred["dbname"],
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-    )
 
-    try:
-        with connection.cursor() as cursor:
-            cursor.execute("SELECT * FROM fileset WHERE id = %s", (id,))
-            fileset = cursor.fetchone()
-            fileset["rom"] = []
-            if not fileset:
-                return f"No fileset found with id {id}", 404
-
-            cursor.execute(
-                "SELECT file.id, name, size, checksum, detection, detection_type FROM file WHERE fileset = %s",
-                (id,),
-            )
-            result = cursor.fetchall()
-            file_ids = {}
-            for file in result:
-                file_ids[file["id"]] = (file["name"], file["size"])
-            cursor.execute(
-                "SELECT file, checksum, checksize, checktype FROM filechecksum WHERE file IN (%s)",
-                (",".join(map(str, file_ids.keys())),),
-            )
-
-            files = cursor.fetchall()
-            checksum_dict = defaultdict(
-                lambda: {"name": "", "size": 0, "checksums": {}}
-            )
-
-            for i in files:
-                file_id = i["file"]
-                file_name, file_size = file_ids[file_id]
-                checksum_dict[file_name]["name"] = file_name
-                checksum_dict[file_name]["size"] = file_size
-                checksum_key = (
-                    f"{i['checktype']}-{i['checksize']}"
-                    if i["checksize"] != 0
-                    else i["checktype"]
-                )
-                checksum_dict[file_name]["checksums"][checksum_key] = i["checksum"]
-
-            fileset["rom"] = [
-                {"name": value["name"], "size": value["size"], **value["checksums"]}
-                for value in checksum_dict.values()
-            ]
-
-            matched_map = find_matching_filesets(fileset, connection, fileset["status"])
-
-            html = f"""
-            <!DOCTYPE html>
-            <html>
-            <head>
-                <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
-            </head>
-            <body>
-            <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-                <a href="{{{{ url_for('index') }}}}">
-                    <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
-                </a>
-            </nav>
-            <h2 style="margin-top: 80px;">Matched Filesets for Fileset: {id}</h2>
-            <table>
-            <tr>
-                <th>Fileset ID</th>
-                <th>Match Count</th>
-                <th>Actions</th>
-            </tr>
+            # Manual merge final candidates
+            query = """
+                SELECT
+                    fs.*,
+                    g.name AS game_name,
+                    g.engine AS game_engine,
+                    g.platform AS game_platform,
+                    g.language AS game_language,
+                    g.extra AS extra
+                FROM
+                    fileset fs
+                LEFT JOIN
+                    game g ON fs.game = g.id
+                JOIN
+                    possible_merges pm ON pm.child_fileset = fs.id
+                WHERE pm.parent_fileset = %s
             """
-
-            for fileset_id, match_count in matched_map.items():
-                if fileset_id == id:
-                    continue
-                cursor.execute(
-                    "SELECT COUNT(file.id) FROM file WHERE fileset = %s", (fileset_id,)
-                )
-                count = cursor.fetchone()["COUNT(file.id)"]
-                html += f"""
-                <tr>
-                    <td>{fileset_id}</td>
-                    <td>{len(match_count)} / {count}</td>
-                    <td><a href="/fileset?id={fileset_id}">View Details</a></td>
-                    <td>
-                        <form method="POST" action="/fileset/{id}/merge/confirm">
-                            <input type="hidden" name="source_id" value="{id}">
-                            <input type="hidden" name="target_id" value="{fileset_id}">
-                            <input type="submit" value="Merge">
-                        </form>
-                    </td>
-                    <td>
-                        <form method="GET" action="/fileset?id={id}">
-                            <input type="submit" value="Cancel">
-                        </form>
-                    </td>
-                </tr>
+            cursor.execute(query, (id,))
+            results = cursor.fetchall()
+            if results:
+                html += """
+                    <h3 style="margin-top: 30px;">Possible Merges</h3>
+                    <table>
+                    <tr><th>ID</th><th>Game Name</th><th>Platform</th><th>Language</th><th>Extra</th><th>Details</th><th>Action</th></tr>
                 """
+                for result in results:
+                    html += f"""
+                    <tr>
+                        <td>{result["id"]}</td>
+                        <td>{result["game_name"]}</td>
+                        <td>{result["game_platform"]}</td>
+                        <td>{result["game_language"]}</td>
+                        <td>{result["extra"]}</td>
+                        <td><a href="/fileset?id={result["id"]}">View Details</a></td>
+                        <td><a href="/fileset/{id}/merge/confirm?target_id={result["id"]}">Merge</a></td>
+                    </tr>
+                    """
+                html += "</table>\n"
 
-            html += "</table></body></html>"
             return render_template_string(html)
     finally:
         connection.close()
@@ -755,7 +680,18 @@ def confirm_merge(id):
                 (id,),
             )
             source_fileset = cursor.fetchone()
-            print(source_fileset)
+
+            # Select all files
+            file_query = """
+                SELECT f.name, f.size, f.`size-r`, f.`size-rd`, 
+                fc.checksum, fc.checksize, fc.checktype, f.detection
+                FROM file f
+                JOIN filechecksum fc ON fc.file = f.id
+                WHERE f.fileset = %s
+            """
+            cursor.execute(file_query, (id,))
+            source_files = cursor.fetchall()
+
             cursor.execute(
                 """
                 SELECT 
@@ -774,6 +710,9 @@ def confirm_merge(id):
             """,
                 (target_id,),
             )
+            target_fileset = cursor.fetchone()
+            cursor.execute(file_query, (target_id,))
+            target_files = cursor.fetchall()
 
             def highlight_differences(source, target):
                 diff = difflib.ndiff(source, target)
@@ -806,12 +745,11 @@ def highlight_differences(source, target):
                 </a>
             </nav>
             <h2 style="margin-top: 80px;">Confirm Merge</h2>
+            <form id="confirm_merge_form">
             <table border="1">
-            <tr><th>Field</th><th>Source Fileset</th><th>Target Fileset</th></tr>
+            <tr><th style="width: 50px;">Field</th><th style="width: 1000px;">Source Fileset</th><th style="width: 1000px;">Target Fileset</th></tr>
             """
 
-            target_fileset = cursor.fetchone()
-
             for column in source_fileset.keys():
                 source_value = str(source_fileset[column])
                 target_value = str(target_fileset[column])
@@ -826,16 +764,141 @@ def highlight_differences(source, target):
                 else:
                     html += f"<tr><td>{column}</td><td>{source_value}</td><td>{target_value}</td></tr>"
 
+            # Files
+            source_files_map = defaultdict(dict)
+            target_files_map = defaultdict(dict)
+            detection_files_set = set()
+
+            if source_files:
+                for file in source_files:
+                    checksize = file["checksize"]
+                    if checksize != "1048576" and file["checksize"] == "1M":
+                        checksize = "1048576"
+                    if checksize != "1048576" and int(file["checksize"]) == 0:
+                        checksize = "full"
+                    check = file["checktype"] + "-" + checksize
+                    source_files_map[file["name"].lower()][check] = file["checksum"]
+                    source_files_map[file["name"].lower()]["size"] = file["size"]
+                    source_files_map[file["name"].lower()]["size-r"] = file["size-r"]
+                    source_files_map[file["name"].lower()]["size-rd"] = file["size-rd"]
+
+            if target_files:
+                for file in target_files:
+                    checksize = file["checksize"]
+                    if checksize != "1048576" and file["checksize"] == "1M":
+                        checksize = "1048576"
+                    if checksize != "1048576" and int(file["checksize"]) == 0:
+                        checksize = "full"
+                    check = file["checktype"] + "-" + checksize
+                    target_files_map[file["name"].lower()][check] = file["checksum"]
+                    target_files_map[file["name"].lower()]["size"] = file["size"]
+                    target_files_map[file["name"].lower()]["size-r"] = file["size-r"]
+                    target_files_map[file["name"].lower()]["size-rd"] = file["size-rd"]
+                    print(file)
+                    if file["detection"] == 1:
+                        detection_files_set.add(file["name"].lower())
+
+            print(detection_files_set)
+
+            all_filenames = sorted(
+                set(source_files_map.keys()) | set(target_files_map.keys())
+            )
+            html += "<tr><th>Files</th></tr>"
+            for filename in all_filenames:
+                source_dict = source_files_map.get(filename, {})
+                target_dict = target_files_map.get(filename, {})
+
+                html += f"<tr><th>{filename}</th><th>Source File</th><th>Target File</th></tr>"
+
+                keys = sorted(set(source_dict.keys()) | set(target_dict.keys()))
+
+                for key in keys:
+                    source_value = str(source_dict.get(key, ""))
+                    target_value = str(target_dict.get(key, ""))
+
+                    source_checked = "checked" if key in source_dict else ""
+                    source_checksum = source_files_map[filename.lower()].get(key, "")
+                    target_checksum = target_files_map[filename.lower()].get(key, "")
+
+                    source_val = html_lib.escape(
+                        json.dumps(
+                            {
+                                "side": "source",
+                                "filename": filename,
+                                "prop": key,
+                                "value": source_checksum,
+                                "detection": "0",
+                            }
+                        )
+                    )
+                    if filename in detection_files_set:
+                        target_val = html_lib.escape(
+                            json.dumps(
+                                {
+                                    "side": "target",
+                                    "filename": filename,
+                                    "prop": key,
+                                    "value": target_checksum,
+                                    "detection": "1",
+                                }
+                            )
+                        )
+                    else:
+                        target_val = html_lib.escape(
+                            json.dumps(
+                                {
+                                    "side": "target",
+                                    "filename": filename,
+                                    "prop": key,
+                                    "value": target_checksum,
+                                    "detection": "0",
+                                }
+                            )
+                        )
+
+                    if source_value != target_value:
+                        source_highlighted, target_highlighted = highlight_differences(
+                            source_value, target_value
+                        )
+
+                        html += f"""
+                        <tr>
+                            <td>{key}</td>
+                            <td>
+                                <input type="checkbox" name="options[]" value="{source_val}" {source_checked}>
+                                {source_highlighted}
+                            </td>
+                            <td>
+                                <input type="checkbox" name="options[]" value="{target_val}">
+                                {target_highlighted}
+                            </td>
+                        </tr>
+                        """
+                    else:
+                        html += f"""
+                        <tr>
+                            <td>{key}</td>
+                            <td>
+                                <input type="checkbox" name="options[]" value="{source_val}" {source_checked}>
+                                {source_value}
+                            </td>
+                            <td>
+                                <input type="checkbox" name="options[]" value="{target_val}">
+                                {target_value}
+                            </td>
+                        </tr>
+                        """
+
             html += """
             </table>
-            <form method="POST" action="{{ url_for('execute_merge', id=id) }}">
                 <input type="hidden" name="source_id" value="{{ source_fileset['id'] }}">
                 <input type="hidden" name="target_id" value="{{ target_fileset['id'] }}">
-                <input type="submit" value="Confirm Merge">
+                <button type="submit">Confirm Merge</button>
             </form>
             <form action="{{ url_for('fileset', id=id) }}">
                 <input type="submit" value="Cancel">
             </form>
+            <script src="{{ url_for('static', filename='js/confirm_merge_form_handler.js') }}"></script>
             </body>
             </html>
             """
@@ -851,9 +914,11 @@ def highlight_differences(source, target):
 
 
 @app.route("/fileset/<int:id>/merge/execute", methods=["POST"])
-def execute_merge(id, source=None, target=None):
-    source_id = request.form["source_id"] if not source else source
-    target_id = request.form["target_id"] if not target else target
+def execute_merge(id):
+    data = request.get_json()
+    source_id = data.get("source_id")
+    target_id = data.get("target_id")
+    options = data.get("options")
 
     base_dir = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(base_dir, "mysql_config.json")
@@ -875,145 +940,136 @@ def execute_merge(id, source=None, target=None):
             source_fileset = cursor.fetchone()
             cursor.execute("SELECT * FROM fileset WHERE id = %s", (target_id,))
 
-            if source_fileset["status"] == "detection":
+            if source_fileset["status"] == "dat":
                 cursor.execute(
                     """
-                UPDATE fileset SET
-                    game = %s
+                    UPDATE fileset SET
                     status = %s,
                     `key` = %s,
-                    megakey = %s,
                     `timestamp` = %s
-                WHERE id = %s
+                    WHERE id = %s
                 """,
                     (
-                        source_fileset["game"],
-                        source_fileset["status"],
+                        "partial",
                         source_fileset["key"],
-                        source_fileset["megakey"],
                         source_fileset["timestamp"],
                         target_id,
                     ),
                 )
 
-                cursor.execute("DELETE FROM file WHERE fileset = %s", (target_id,))
-
-                cursor.execute("SELECT * FROM file WHERE fileset = %s", (source_id,))
-                source_files = cursor.fetchall()
+                source_filenames = set()
+                change_fileset_id = set()
+                file_details_map = defaultdict(dict)
+
+                for file in options:
+                    filename = file["filename"].lower()
+                    if "detection" not in file_details_map[filename]:
+                        file_details_map[filename]["detection"] = file["detection"]
+                        file_details_map[filename]["detection_type"] = file["prop"]
+                    elif (
+                        "detection" in file_details_map[filename]
+                        and file_details_map[filename]["detection"] != "1"
+                    ):
+                        file_details_map[filename]["detection"] = file["detection"]
+                        file_details_map[filename]["detection_type"] = file["prop"]
+                    if file["prop"].startswith("md5"):
+                        if "checksums" not in file_details_map[filename]:
+                            file_details_map[filename]["checksums"] = []
+                        file_details_map[filename]["checksums"].append(
+                            {"check": file["prop"], "value": file["value"]}
+                        )
+                    if file["side"] == "source":
+                        source_filenames.add(filename)
 
-                for file in source_files:
-                    cursor.execute(
+                # Delete older checksums
+                for file in options:
+                    filename = file["filename"].lower()
+                    if file["side"] == "source":
+                        cursor.execute(
+                            """SELECT f.id as file_id FROM file f
+                                       JOIN fileset fs ON fs.id = f.fileset 
+                                       WHERE f.name = %s
+                                       AND fs.id = %s""",
+                            (filename, source_id),
+                        )
+                        file_id = cursor.fetchone()["file_id"]
+                        query = """
+                            DELETE FROM filechecksum
+                            WHERE file = %s
                         """
-                    INSERT INTO file (name, size, checksum, fileset, detection, `timestamp`)
-                    VALUES (%s, %s, %s, %s, %s, NOW())
-                    """,
-                        (
-                            file["name"].lower(),
-                            file["size"],
-                            file["checksum"],
-                            target_id,
-                            file["detection"],
-                        ),
-                    )
-
-                    cursor.execute("SELECT LAST_INSERT_ID() as file_id")
-                    new_file_id = cursor.fetchone()["file_id"]
+                        cursor.execute(query, (file_id,))
+                    else:
+                        if filename not in source_filenames:
+                            cursor.execute(
+                                """SELECT f.id as file_id FROM file f
+                            JOIN fileset fs ON fs.id = f.fileset 
+                            WHERE f.name = %s
+                            AND fs.id = %s""",
+                                (filename, target_id),
+                            )
+                            target_file_id = cursor.fetchone()["file_id"]
+                            change_fileset_id.add(target_file_id)
 
+                for filename, details in file_details_map.items():
                     cursor.execute(
-                        "SELECT * FROM filechecksum WHERE file = %s", (file["id"],)
+                        """SELECT f.id as file_id FROM file f
+                                    JOIN fileset fs ON fs.id = f.fileset 
+                                    WHERE f.name = %s
+                                    AND fs.id = %s""",
+                        (filename, source_id),
                     )
-                    file_checksums = cursor.fetchall()
-
-                    for checksum in file_checksums:
+                    source_file_id = cursor.fetchone()["file_id"]
+                    detection = (
+                        details["detection"] == "1" if "detection" in details else False
+                    )
+                    if detection:
+                        query = """
+                            UPDATE file 
+                            SET detection = 1,
+                            detection_type = %s
+                            WHERE id = %s
+                        """
                         cursor.execute(
-                            """
-                        INSERT INTO filechecksum (file, checksize, checktype, checksum)
-                        VALUES (%s, %s, %s, %s)
-                        """,
+                            query,
                             (
-                                new_file_id,
-                                checksum["checksize"],
-                                checksum["checktype"],
-                                checksum["checksum"],
+                                details["detection_type"],
+                                source_file_id,
                             ),
                         )
-            elif source_fileset["status"] in ["scan", "dat"]:
-                cursor.execute(
-                    """
-                UPDATE fileset SET
-                    status = %s,
-                    `key` = %s,
-                    `timestamp` = %s
-                WHERE id = %s
-                """,
-                    (
-                        source_fileset["status"]
-                        if source_fileset["status"] != "dat"
-                        else "partial",
-                        source_fileset["key"],
-                        source_fileset["timestamp"],
-                        target_id,
-                    ),
-                )
-                cursor.execute("SELECT * FROM file WHERE fileset = %s", (source_id,))
-                source_files = cursor.fetchall()
-
-                cursor.execute("SELECT * FROM file WHERE fileset = %s", (target_id,))
-                target_files = cursor.fetchall()
-
-                target_files_dict = {}
-                for target_file in target_files:
-                    cursor.execute(
-                        "SELECT * FROM filechecksum WHERE file = %s",
-                        (target_file["id"],),
-                    )
-                    target_checksums = cursor.fetchall()
-                    for checksum in target_checksums:
-                        target_files_dict[checksum["checksum"]] = target_file
-
-                for source_file in source_files:
-                    cursor.execute(
-                        "SELECT * FROM filechecksum WHERE file = %s",
-                        (source_file["id"],),
-                    )
-                    source_checksums = cursor.fetchall()
-                    file_exists = False
-                    for checksum in source_checksums:
-                        print(checksum["checksum"])
-                        if checksum["checksum"] in target_files_dict.keys():
-                            target_file = target_files_dict[checksum["checksum"]]
-                            source_file["detection"] = target_file["detection"]
+                        cursor.execute(
+                            """SELECT f.id as file_id FROM file f
+                                    JOIN fileset fs ON fs.id = f.fileset 
+                                    WHERE f.name = %s
+                                    AND fs.id = %s""",
+                            (filename, target_id),
+                        )
+                        target_file_id = cursor.fetchone()["file_id"]
+                        cursor.execute(
+                            "DELETE FROM file WHERE id = %s", (target_file_id,)
+                        )
+                    for c in details["checksums"]:
+                        checksum = c["value"]
+                        check = c["check"]
+                        checksize, checktype, checksum = get_checksum_props(
+                            check, checksum
+                        )
+                        query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
+                        cursor.execute(
+                            query, (source_file_id, checksize, checktype, checksum)
+                        )
 
-                            cursor.execute(
-                                "DELETE FROM file WHERE id = %s", (target_file["id"],)
-                            )
-                            file_exists = True
-                            break
-                    print(file_exists)
                     cursor.execute(
-                        """INSERT INTO file (name, size, checksum, fileset, detection, `timestamp`) VALUES (
-                        %s, %s, %s, %s, %s, NOW())""",
-                        (
-                            source_file["name"],
-                            source_file["size"],
-                            source_file["checksum"],
-                            target_id,
-                            source_file["detection"],
-                        ),
+                        "UPDATE file SET fileset = %s WHERE id = %s",
+                        (target_id, source_file_id),
                     )
-                    new_file_id = cursor.lastrowid
-                    for checksum in source_checksums:
-                        # TODO: Handle the string
 
-                        cursor.execute(
-                            "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)",
-                            (
-                                new_file_id,
-                                checksum["checksize"],
-                                f"{checksum['checktype']}-{checksum['checksize']}",
-                                checksum["checksum"],
-                            ),
-                        )
+                # for target_file_id in change_fileset_id:
+                #     query = """
+                #         UPDATE file
+                #         SET fileset = %s
+                #         WHERE id = %s
+                #     """
+                #     cursor.execute(query, (source_id, target_file_id))
 
             cursor.execute(
                 """
@@ -1023,6 +1079,17 @@ def execute_merge(id, source=None, target=None):
                 (target_id, source_id),
             )
 
+            delete_original_fileset(source_id, connection)
+            category_text = "Manually Merged"
+            log_text = f"Manually merged Fileset:{source_id} with Fileset:{target_id}."
+            create_log(category_text, "Moderator", log_text, connection)
+
+            query = """
+                DELETE FROM possible_merges
+                WHERE parent_fileset = %s
+            """
+            cursor.execute(query, (source_id,))
+
             connection.commit()
 
             return redirect(url_for("fileset", id=target_id))
diff --git a/static/js/confirm_merge_form_handler.js b/static/js/confirm_merge_form_handler.js
new file mode 100644
index 00000000..d514091b
--- /dev/null
+++ b/static/js/confirm_merge_form_handler.js
@@ -0,0 +1,85 @@
+document.getElementById("confirm_merge_form").addEventListener("submit", async function (e) {
+  e.preventDefault();
+
+  const form = e.target;
+
+  source_id = form.querySelector('input[name="source_id"]').value
+  
+  const jsonData = {
+    source_id: source_id,
+    target_id: form.querySelector('input[name="target_id"]').value,
+    options: []
+  };
+  
+  const checkedBoxes = form.querySelectorAll('input[name="options[]"]:checked');
+  jsonData.options = Array.from(checkedBoxes).map(cb => {
+    const optionData = JSON.parse(cb.value);
+    optionData.tick = "on";
+    return optionData;
+  });
+  
+  console.log("Data being sent:", jsonData);
+
+  const response = await fetch(`/fileset/${source_id}/merge/execute`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify(jsonData),
+  });
+
+  if (response.redirected) {
+    window.location.href = response.url;
+  }
+});
+
+
+function checkForConflicts() {
+  const checkedBoxes = document.querySelectorAll('input[name="options[]"]:checked');
+  const conflicts = new Map();
+  
+  Array.from(checkedBoxes).forEach(cb => {
+    const option = JSON.parse(cb.value);
+    const key = `${option.filename}|${option.prop}`;
+    if (!conflicts.has(key)) {
+      conflicts.set(key, []);
+    }
+    conflicts.get(key).push({side: option.side, checkbox: cb});
+  });
+  
+  document.querySelectorAll('input[name="options[]"]').forEach(cb => {
+    cb.style.backgroundColor = '';
+    cb.parentElement.style.backgroundColor = '';
+  });
+  
+  let hasConflicts = false;
+  
+  conflicts.forEach((items, key) => {
+    if (items.length > 1) {
+      
+      hasConflicts = true;
+      
+      items.forEach(item => {
+        item.checkbox.style.backgroundColor = '#ffcccc';
+        item.checkbox.parentElement.style.backgroundColor = '#ffe6e6';
+      });
+    }
+  });
+  
+  const submitButton = document.querySelector('button[type="submit"]');
+  if (hasConflicts) {
+    submitButton.disabled = true;
+    submitButton.textContent = 'Resolve Conflicts First';
+    submitButton.style.backgroundColor = '#ccc';
+  } else {
+    submitButton.disabled = false;
+    submitButton.textContent = 'Confirm Merge';
+    submitButton.style.backgroundColor = '';
+  }
+}
+
+
+document.querySelectorAll('input[name="options[]"]').forEach(checkbox => {
+  checkbox.addEventListener('change', checkForConflicts);
+});
+

From 1e791850d6a70a157754a0f78ccf03005dbb2a18 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 26 Jul 2025 00:13:31 +0530
Subject: [PATCH 10/47] INTEGRITY: Add detection type for full checksums in
 detection entries

---
 db_functions.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/db_functions.py b/db_functions.py
index 18367886..a0b68bfd 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -218,6 +218,11 @@ def insert_file(file, detection, src, conn):
     if "md5" in file:
         checksum = file["md5"]
         checksum = checksum.split(":")[1] if ":" in checksum else checksum
+        tag = checksum.split(":")[0] if ":" in checksum else ""
+        checktype = "md5"
+        if tag != "":
+            checktype += "-" + tag
+        checksize = 0
     else:
         for key, value in file.items():
             if "md5" in key:

From 2bc1e6f003c43914b247bf349fa499e53e3c2ce5 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 26 Jul 2025 00:26:52 +0530
Subject: [PATCH 11/47] INTEGRITY: Update timestamp for detection files in
 partial fileset conversion for set.dat

---
 db_functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/db_functions.py b/db_functions.py
index a0b68bfd..b3320238 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -2846,7 +2846,8 @@ def set_populate_file(fileset, fileset_id, conn, detection):
                 query = """
                     UPDATE file
                     SET size = %s,
-                    name = %s
+                    name = %s,
+                    `timestamp` = NOW()
                     WHERE id = %s
                 """
 

From 749b5a955b28a19bfff8a3772f5415ea1ae8e2ae Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 26 Jul 2025 13:01:47 +0530
Subject: [PATCH 12/47] INTEGRITY: Add extra error handling for parsing.

---
 dat_parser.py   | 157 +++++++++++++++++++++++++++++++++---------------
 db_functions.py |  74 ++++++++++++-----------
 2 files changed, 147 insertions(+), 84 deletions(-)

diff --git a/dat_parser.py b/dat_parser.py
index a76480b2..9655d5c5 100644
--- a/dat_parser.py
+++ b/dat_parser.py
@@ -1,5 +1,6 @@
 import re
 import os
+import sys
 from db_functions import db_insert, match_fileset
 import argparse
 
@@ -79,21 +80,40 @@ def match_outermost_brackets(input):
     depth = 0
     inside_quotes = False
     cur_index = 0
+    line_number = 1
+    index_line = 1
 
-    for i in range(len(input)):
-        char = input[i]
+    for i, char in enumerate(input):
+        if char == "\n":
+            line_number += 1
+            inside_quotes = False
 
-        if char == "(" and not inside_quotes:
+        if char == '"' and input[i - 1] != "\\":
+            inside_quotes = not inside_quotes
+
+        elif char == "(" and not inside_quotes:
             if depth == 0:
+                if "rom" in input[i - 4 : i]:
+                    raise ValueError(
+                        f"Missing an opening '(' for the game. Look near line {line_number}."
+                    )
+                index_line = line_number
                 cur_index = i
             depth += 1
+
         elif char == ")" and not inside_quotes:
+            if depth == 0:
+                print(f"Warning: unmatched ')' at line {line_number}")
+                continue
             depth -= 1
             if depth == 0:
                 match = input[cur_index : i + 1]
                 matches.append((match, cur_index))
-        elif char == '"' and input[i - 1] != "\\":
-            inside_quotes = not inside_quotes
+
+    if depth != 0:
+        raise ValueError(
+            f"Unmatched '(' starting at line {index_line}: possibly an unclosed block."
+        )
 
     return matches
 
@@ -104,61 +124,102 @@ def parse_dat(dat_filepath):
     associated arrays
     """
     if not os.path.isfile(dat_filepath):
-        print("File not readable")
-        return
+        print(f"Error: File does not exist or is unreadable: {dat_filepath}.")
+        return None
 
-    with open(dat_filepath, "r", encoding="utf-8") as dat_file:
-        content = dat_file.read()
+    try:
+        with open(dat_filepath, "r", encoding="utf-8") as dat_file:
+            content = dat_file.read()
+    except (IOError, UnicodeDecodeError) as e:
+        print(f"Error: Failed to read file {dat_filepath}: {e}")
+        return None
 
     header = {}
     game_data = []
     resources = {}
 
-    matches = match_outermost_brackets(content)
-    # print(matches)
+    try:
+        matches = match_outermost_brackets(content)
+    except Exception as e:
+        print(f"Error: Failed to parse outer brackets in {dat_filepath}: {e}")
+        return None
     if matches:
         for data_segment in matches:
-            if (
-                "clrmamepro" in content[data_segment[1] - 11 : data_segment[1]]
-                or "scummvm" in content[data_segment[1] - 8 : data_segment[1]]
-            ):
-                header = map_key_values(data_segment[0], header)
-            elif "game" in content[data_segment[1] - 5 : data_segment[1]]:
-                temp = {}
-                temp = map_key_values(data_segment[0], temp)
-                game_data.append(temp)
-            elif "resource" in content[data_segment[1] - 9 : data_segment[1]]:
-                temp = {}
-                temp = map_key_values(data_segment[0], temp)
-                resources[temp["name"]] = temp
-    # print(header, game_data, resources, dat_filepath)
+            try:
+                if (
+                    "clrmamepro" in content[data_segment[1] - 11 : data_segment[1]]
+                    or "scummvm" in content[data_segment[1] - 8 : data_segment[1]]
+                ):
+                    header = map_key_values(data_segment[0], header)
+                elif "game" in content[data_segment[1] - 5 : data_segment[1]]:
+                    temp = {}
+                    temp = map_key_values(data_segment[0], temp)
+                    game_data.append(temp)
+                elif "resource" in content[data_segment[1] - 9 : data_segment[1]]:
+                    temp = {}
+                    temp = map_key_values(data_segment[0], temp)
+                    resources[temp["name"]] = temp
+            except Exception as e:
+                print(f"Error: Failed to parse a data_segment: {e}")
+                return None
+
     return header, game_data, resources, dat_filepath
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description="Process DAT files and interact with the database."
-    )
-    parser.add_argument(
-        "--upload", nargs="+", help="Upload DAT file(s) to the database"
-    )
-    parser.add_argument(
-        "--match", nargs="+", help="Populate matching games in the database"
-    )
-    parser.add_argument("--user", help="Username for database")
-    parser.add_argument("-r", help="Recurse through directories", action="store_true")
-    parser.add_argument("--skiplog", help="Skip logging dups", action="store_true")
-
-    args = parser.parse_args()
-
-    if args.upload:
-        for filepath in args.upload:
-            db_insert(parse_dat(filepath), args.user, args.skiplog)
-
-    if args.match:
-        for filepath in args.match:
-            # print(parse_dat(filepath)[2])
-            match_fileset(parse_dat(filepath), args.user, args.skiplog)
+    try:
+        parser = argparse.ArgumentParser(
+            description="Process DAT files and interact with the database."
+        )
+        parser.add_argument(
+            "--upload", nargs="+", help="Upload DAT file(s) to the database"
+        )
+        parser.add_argument(
+            "--match", nargs="+", help="Populate matching games in the database"
+        )
+        parser.add_argument("--user", help="Username for database")
+        parser.add_argument(
+            "-r", help="Recurse through directories", action="store_true"
+        )
+        parser.add_argument("--skiplog", help="Skip logging dups", action="store_true")
+
+        args = parser.parse_args()
+
+        if not args.upload and not args.match:
+            print("Error: No action specified. Use --upload or --match")
+            parser.print_help()
+            sys.exit(1)
+
+        if args.upload:
+            for filepath in args.upload:
+                try:
+                    parsed_data = parse_dat(filepath)
+                    if parsed_data is not None:
+                        db_insert(parsed_data, args.user, args.skiplog)
+                    else:
+                        print(f"Error: Failed to parse file for upload: {filepath}")
+                except Exception as e:
+                    print(f"Error uploading {filepath}: {e}")
+                    continue
+
+        if args.match:
+            for filepath in args.match:
+                try:
+                    parsed_data = parse_dat(filepath)
+                    if parsed_data[0] is not None:
+                        match_fileset(parsed_data, args.user, args.skiplog)
+                    else:
+                        print(f"Error: Failed to parse file for matching: {filepath}")
+                except Exception as e:
+                    print(f"Error matching {filepath}: {e}")
+                    continue
+
+    except KeyboardInterrupt:
+        print("Operation cancelled by user")
+        sys.exit(0)
+    except Exception as e:
+        print(f"Error: Unexpected error in main: {e}")
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/db_functions.py b/db_functions.py
index b3320238..ff5d7f8f 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -519,14 +519,19 @@ def db_insert(data_arr, username=None, skiplog=False):
     try:
         author = header["author"]
         version = header["version"]
+        if author != "scummvm":
+            raise ValueError(
+                f"Author needs to be scummvm for seeding. Incorrect author: {author}"
+            )
+    except ValueError as ve:
+        raise ve
     except KeyError as e:
         print(f"Missing key in header: {e}")
         return
 
-    src = "dat" if author not in ["scan", "scummvm"] else author
-
-    detection = src == "scummvm"
-    status = "detection" if detection else src
+    src = author
+    detection = True
+    status = "detection"
 
     conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
 
@@ -552,38 +557,35 @@ def db_insert(data_arr, username=None, skiplog=False):
         key = calc_key(fileset)
         megakey = calc_megakey(fileset)
 
-        if detection:
-            try:
-                engine_name = fileset.get("engine", "")
-                engineid = fileset["sourcefile"]
-                gameid = fileset["name"]
-                title = fileset.get("title", "")
-                extra = fileset.get("extra", "")
-                platform = fileset.get("platform", "")
-                lang = fileset.get("language", "")
-            except KeyError as e:
-                print(
-                    f"Missing key in header: {e} for {fileset.get('name', '')}-{fileset.get('language', '')}-{fileset.get('platform', '')}"
-                )
-                return
+        try:
+            engine_name = fileset.get("engine", "")
+            engineid = fileset["sourcefile"]
+            gameid = fileset["name"]
+            title = fileset.get("title", "")
+            extra = fileset.get("extra", "")
+            platform = fileset.get("platform", "")
+            lang = fileset.get("language", "")
+        except KeyError as e:
+            print(
+                f"Missing key in header: {e} for {fileset.get('name', '')}-{fileset.get('language', '')}-{fileset.get('platform', '')}"
+            )
+            return
 
-            with conn.cursor() as cursor:
-                query = """
-                    SELECT id
-                    FROM fileset
-                    WHERE `key` = %s
-                """
-                cursor.execute(query, (key,))
-                existing_entry = cursor.fetchone()
-                if existing_entry is not None:
-                    log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
-                    create_log("Warning", user, escape_string(log_text), conn)
-                    console_log(log_text)
-                    continue
+        with conn.cursor() as cursor:
+            query = """
+                SELECT id
+                FROM fileset
+                WHERE `key` = %s
+            """
+            cursor.execute(query, (key,))
+            existing_entry = cursor.fetchone()
+            if existing_entry is not None:
+                log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
+                create_log("Warning", user, escape_string(log_text), conn)
+                console_log(log_text)
+                continue
 
-            insert_game(
-                engine_name, engineid, title, gameid, extra, platform, lang, conn
-            )
+        insert_game(engine_name, engineid, title, gameid, extra, platform, lang, conn)
 
         log_text = f"size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}."
 
@@ -894,8 +896,8 @@ def match_fileset(data_arr, username=None, skiplog=False):
         return
 
     src = "dat" if author not in ["scan", "scummvm"] else author
-    detection = src == "scummvm"
-    source_status = "detection" if detection else src
+    detection = False
+    source_status = src
 
     conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
 

From 546dfe3a447108a152712ba9a11c949761de8c63 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 26 Jul 2025 13:29:40 +0530
Subject: [PATCH 13/47] INTEGRITY: Add commit/rollback transaction support to
 match_fileset and db_insert.

---
 db_functions.py | 346 +++++++++++++++++++++++++-----------------------
 1 file changed, 184 insertions(+), 162 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index ff5d7f8f..b2cbc558 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -382,19 +382,14 @@ def punycode_need_encode(orig):
 
 
 def create_log(category, user, text, conn):
-    query = f"INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME({int(time.time())}), '{escape_string(category)}', '{escape_string(user)}', '{escape_string(text)}')"
-    query = "INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME(%s), %s, %s, %s)"
     with conn.cursor() as cursor:
         try:
+            query = "INSERT INTO log (`timestamp`, category, user, `text`) VALUES (FROM_UNIXTIME(%s), %s, %s, %s)"
             cursor.execute(query, (int(time.time()), category, user, text))
-            conn.commit()
-        except Exception as e:
-            conn.rollback()
-            print(f"Creating log failed: {e}")
-            log_last = None
-        else:
             cursor.execute("SELECT LAST_INSERT_ID()")
             log_last = cursor.fetchone()["LAST_INSERT_ID()"]
+        except Exception as e:
+            raise RuntimeError("Log creation failed") from e
     return log_last
 
 
@@ -405,9 +400,7 @@ def update_history(source_id, target_id, conn, log_last=None):
             cursor.execute(
                 query, (target_id, source_id, log_last if log_last is not None else 0)
             )
-            conn.commit()
         except Exception as e:
-            conn.rollback()
             print(f"Creating log failed: {e}")
             log_last = None
         else:
@@ -523,120 +516,137 @@ def db_insert(data_arr, username=None, skiplog=False):
             raise ValueError(
                 f"Author needs to be scummvm for seeding. Incorrect author: {author}"
             )
-    except ValueError as ve:
-        raise ve
     except KeyError as e:
         print(f"Missing key in header: {e}")
         return
 
-    src = author
-    detection = True
-    status = "detection"
-
-    conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
-
-    with conn.cursor() as cursor:
-        cursor.execute("SELECT MAX(`transaction`) FROM transactions")
-        temp = cursor.fetchone()["MAX(`transaction`)"]
-        if temp is None:
-            temp = 0
-        transaction_id = temp + 1
+    try:
+        src = author
+        detection = True
+        status = "detection"
 
-    category_text = f"Uploaded from {src}"
-    log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
+        with conn.cursor() as cursor:
+            cursor.execute("SET @fileset_time_last = %s", (int(time.time()),))
 
-    user = f"cli:{getpass.getuser()}" if username is None else username
-    create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        with conn.cursor() as cursor:
+            cursor.execute("SELECT MAX(`transaction`) FROM transactions")
+            temp = cursor.fetchone()["MAX(`transaction`)"]
+            if temp is None:
+                temp = 0
+            transaction_id = temp + 1
 
-    console_log(log_text)
-    console_log_total_filesets(filepath)
+        category_text = f"Uploaded from {src}"
+        log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
 
-    fileset_count = 1
-    for fileset in game_data:
-        console_log_detection(fileset_count)
-        key = calc_key(fileset)
-        megakey = calc_megakey(fileset)
+        user = f"cli:{getpass.getuser()}" if username is None else username
+        create_log(escape_string(category_text), user, escape_string(log_text), conn)
 
-        try:
-            engine_name = fileset.get("engine", "")
-            engineid = fileset["sourcefile"]
-            gameid = fileset["name"]
-            title = fileset.get("title", "")
-            extra = fileset.get("extra", "")
-            platform = fileset.get("platform", "")
-            lang = fileset.get("language", "")
-        except KeyError as e:
-            print(
-                f"Missing key in header: {e} for {fileset.get('name', '')}-{fileset.get('language', '')}-{fileset.get('platform', '')}"
-            )
-            return
+        console_log(log_text)
+        console_log_total_filesets(filepath)
 
-        with conn.cursor() as cursor:
-            query = """
-                SELECT id
-                FROM fileset
-                WHERE `key` = %s
-            """
-            cursor.execute(query, (key,))
-            existing_entry = cursor.fetchone()
-            if existing_entry is not None:
-                log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
-                create_log("Warning", user, escape_string(log_text), conn)
-                console_log(log_text)
-                continue
+        fileset_count = 1
+        for fileset in game_data:
+            console_log_detection(fileset_count)
+            key = calc_key(fileset)
+            megakey = calc_megakey(fileset)
+
+            try:
+                engine_name = fileset.get("engine", "")
+                engineid = fileset["sourcefile"]
+                gameid = fileset["name"]
+                title = fileset.get("title", "")
+                extra = fileset.get("extra", "")
+                platform = fileset.get("platform", "")
+                lang = fileset.get("language", "")
+            except KeyError as e:
+                raise RuntimeError(
+                    f"Missing key in header: {e} for {fileset.get('name', '')}-{fileset.get('language', '')}-{fileset.get('platform', '')}"
+                )
 
-        insert_game(engine_name, engineid, title, gameid, extra, platform, lang, conn)
+            with conn.cursor() as cursor:
+                query = """
+                    SELECT id
+                    FROM fileset
+                    WHERE `key` = %s
+                """
+                cursor.execute(query, (key,))
+                existing_entry = cursor.fetchone()
+                if existing_entry is not None:
+                    log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
+                    create_log("Warning", user, escape_string(log_text), conn)
+                    console_log(log_text)
+                    continue
 
-        log_text = f"size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}."
+            insert_game(
+                engine_name, engineid, title, gameid, extra, platform, lang, conn
+            )
 
-        if insert_fileset(
-            src,
-            detection,
-            key,
-            megakey,
-            transaction_id,
-            log_text,
-            conn,
-            username=username,
-            skiplog=skiplog,
-        ):
-            # Some detection entries contain duplicate files.
-            unique_files = []
-            seen = set()
-            for file_dict in fileset["rom"]:
-                dict_tuple = tuple(sorted(file_dict.items()))
-                if dict_tuple not in seen:
-                    seen.add(dict_tuple)
-                    unique_files.append(file_dict)
-
-            for file in unique_files:
-                insert_file(file, detection, src, conn)
-                file_id = None
-                with conn.cursor() as cursor:
-                    cursor.execute("SELECT @file_last AS file_id")
-                    file_id = cursor.fetchone()["file_id"]
-                for key, value in file.items():
-                    if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
-                        insert_filechecksum(file, key, file_id, conn)
+            log_text = f"size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}."
 
-        fileset_count += 1
+            if insert_fileset(
+                src,
+                detection,
+                key,
+                megakey,
+                transaction_id,
+                log_text,
+                conn,
+                username=username,
+                skiplog=skiplog,
+            ):
+                # Some detection entries contain duplicate files.
+                unique_files = []
+                seen = set()
+                for file_dict in fileset["rom"]:
+                    dict_tuple = tuple(sorted(file_dict.items()))
+                    if dict_tuple not in seen:
+                        seen.add(dict_tuple)
+                        unique_files.append(file_dict)
+
+                for file in unique_files:
+                    insert_file(file, detection, src, conn)
+                    file_id = None
+                    with conn.cursor() as cursor:
+                        cursor.execute("SELECT @file_last AS file_id")
+                        file_id = cursor.fetchone()["file_id"]
+                    for key, value in file.items():
+                        if key not in [
+                            "name",
+                            "size",
+                            "size-r",
+                            "size-rd",
+                            "sha1",
+                            "crc",
+                        ]:
+                            insert_filechecksum(file, key, file_id, conn)
+
+            fileset_count += 1
+
+        cur = conn.cursor()
 
-    cur = conn.cursor()
+        try:
+            cur.execute(
+                "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
+                (transaction_id,),
+            )
+            fileset_insertion_count = cur.fetchone()["COUNT(fileset)"]
+            category_text = f"Uploaded from {src}"
+            log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
+            console_log(log_text)
+        except Exception as e:
+            print("Inserting failed:", e)
+        else:
+            user = f"cli:{getpass.getuser()}" if username is None else username
+            create_log(
+                escape_string(category_text), user, escape_string(log_text), conn
+            )
 
-    try:
-        cur.execute(
-            "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
-            (transaction_id,),
-        )
-        fileset_insertion_count = cur.fetchone()["COUNT(fileset)"]
-        category_text = f"Uploaded from {src}"
-        log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
-        console_log(log_text)
+        conn.commit()
     except Exception as e:
-        print("Inserting failed:", e)
-    else:
-        user = f"cli:{getpass.getuser()}" if username is None else username
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        conn.rollback()
+        print(f"Transaction failed: {e}")
+    finally:
+        conn.close()
 
 
 def compare_filesets(id1, id2, conn):
@@ -895,59 +905,27 @@ def match_fileset(data_arr, username=None, skiplog=False):
         print(f"Missing key in header: {e}")
         return
 
-    src = "dat" if author not in ["scan", "scummvm"] else author
-    detection = False
-    source_status = src
-
-    conn.cursor().execute("SET @fileset_time_last = %s", (int(time.time()),))
+    try:
+        src = "dat" if author not in ["scan", "scummvm"] else author
+        detection = False
+        source_status = src
 
-    with conn.cursor() as cursor:
-        cursor.execute("SELECT MAX(`transaction`) FROM transactions")
-        transaction_id = cursor.fetchone()["MAX(`transaction`)"]
-        transaction_id = transaction_id + 1 if transaction_id else 1
+        with conn.cursor() as cursor:
+            cursor.execute("SET @fileset_time_last = %s", (int(time.time()),))
+            cursor.execute("SELECT MAX(`transaction`) FROM transactions")
+            transaction_id = cursor.fetchone()["MAX(`transaction`)"]
+            transaction_id = transaction_id + 1 if transaction_id else 1
 
-    category_text = f"Uploaded from {src}"
-    log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}"
-    console_log(log_text)
-    console_log_total_filesets(filepath)
-    user = f"cli:{getpass.getuser()}" if username is None else username
-    create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        category_text = f"Uploaded from {src}"
+        log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Transaction: {transaction_id}"
+        console_log(log_text)
+        console_log_total_filesets(filepath)
+        user = f"cli:{getpass.getuser()}" if username is None else username
+        create_log(escape_string(category_text), user, escape_string(log_text), conn)
 
-    if src == "dat":
-        set_process(
-            game_data,
-            resources,
-            detection,
-            src,
-            conn,
-            transaction_id,
-            filepath,
-            author,
-            version,
-            source_status,
-            user,
-            skiplog,
-        )
-    elif src == "scan":
-        scan_process(
-            game_data,
-            resources,
-            detection,
-            src,
-            conn,
-            transaction_id,
-            filepath,
-            author,
-            version,
-            source_status,
-            user,
-            skiplog,
-        )
-    else:
-        game_data_lookup = {fs["name"]: fs for fs in game_data}
-        for fileset in game_data:
-            process_fileset(
-                fileset,
+        if src == "dat":
+            set_process(
+                game_data,
                 resources,
                 detection,
                 src,
@@ -958,11 +936,56 @@ def match_fileset(data_arr, username=None, skiplog=False):
                 version,
                 source_status,
                 user,
-                game_data_lookup,
+                skiplog,
             )
-        finalize_fileset_insertion(
-            conn, transaction_id, src, filepath, author, version, source_status, user
-        )
+        elif src == "scan":
+            scan_process(
+                game_data,
+                resources,
+                detection,
+                src,
+                conn,
+                transaction_id,
+                filepath,
+                author,
+                version,
+                source_status,
+                user,
+                skiplog,
+            )
+        else:
+            game_data_lookup = {fs["name"]: fs for fs in game_data}
+            for fileset in game_data:
+                process_fileset(
+                    fileset,
+                    resources,
+                    detection,
+                    src,
+                    conn,
+                    transaction_id,
+                    filepath,
+                    author,
+                    version,
+                    source_status,
+                    user,
+                    game_data_lookup,
+                )
+            finalize_fileset_insertion(
+                conn,
+                transaction_id,
+                src,
+                filepath,
+                author,
+                version,
+                source_status,
+                user,
+            )
+        conn.commit()
+    except Exception as e:
+        conn.rollback()
+        print(f"Transaction failed: {e}")
+    finally:
+        conn.close()
 
 
 def scan_process(
@@ -2639,7 +2662,6 @@ def delete_original_fileset(fileset_id, conn):
     with conn.cursor() as cursor:
         cursor.execute("DELETE FROM file WHERE fileset = %s", (fileset_id,))
         cursor.execute("DELETE FROM fileset WHERE id = %s", (fileset_id,))
-    conn.commit()
 
 
 def update_fileset_status(cursor, fileset_id, status):

From 686d06e3970f9d00ad50751b57f1a8378458cd74 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 26 Jul 2025 13:36:45 +0530
Subject: [PATCH 14/47] INTEGRITY: Remove early string escaping for database
 logs as queries have been parametrised

---
 db_functions.py | 96 +++++++++++++++++--------------------------------
 1 file changed, 33 insertions(+), 63 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index b2cbc558..272935b6 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -161,9 +161,7 @@ def insert_fileset(
         log_text = f"Updated Fileset:{existing_entry}, {log_text}"
         user = f"cli:{getpass.getuser()}" if username is None else username
         if not skiplog:
-            log_last = create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            log_last = create_log(category_text, user, log_text, conn)
             update_history(existing_entry, existing_entry, conn, log_last)
 
         return (existing_entry, True)
@@ -187,9 +185,7 @@ def insert_fileset(
 
     user = f"cli:{getpass.getuser()}" if username is None else username
     if not skiplog and detection:
-        log_last = create_log(
-            escape_string(category_text), user, escape_string(log_text), conn
-        )
+        log_last = create_log(category_text, user, log_text, conn)
         update_history(fileset_last, fileset_last, conn, log_last)
     else:
         update_history(0, fileset_last, conn)
@@ -539,7 +535,7 @@ def db_insert(data_arr, username=None, skiplog=False):
         log_text = f"Started loading DAT file {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}. Transaction: {transaction_id}"
 
         user = f"cli:{getpass.getuser()}" if username is None else username
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
 
         console_log(log_text)
         console_log_total_filesets(filepath)
@@ -573,7 +569,7 @@ def db_insert(data_arr, username=None, skiplog=False):
                 existing_entry = cursor.fetchone()
                 if existing_entry is not None:
                     log_text = f"Skipping Entry as similar entry already exsits - Fileset:{existing_entry['id']}. Skpped entry details - engineid = {engineid}, gameid = {gameid}, platform = {platform}, language = {lang}"
-                    create_log("Warning", user, escape_string(log_text), conn)
+                    create_log("Warning", user, log_text, conn)
                     console_log(log_text)
                     continue
 
@@ -637,9 +633,7 @@ def db_insert(data_arr, username=None, skiplog=False):
             print("Inserting failed:", e)
         else:
             user = f"cli:{getpass.getuser()}" if username is None else username
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
 
         conn.commit()
     except Exception as e:
@@ -858,16 +852,12 @@ def populate_matching_games():
             create_log(
                 "Fileset merge",
                 user,
-                escape_string(
-                    f"Merged Fileset:{matched_game['fileset']} and Fileset:{fileset[0][0]}"
-                ),
+                f"Merged Fileset:{matched_game['fileset']} and Fileset:{fileset[0][0]}",
                 conn,
             )
 
             # Matching log
-            log_last = create_log(
-                escape_string(conn, category_text), user, escape_string(conn, log_text)
-            )
+            log_last = create_log(conn, category_text, user, conn, log_text)
 
             # Add log id to the history table
             cursor.execute(
@@ -921,7 +911,7 @@ def match_fileset(data_arr, username=None, skiplog=False):
         console_log(log_text)
         console_log_total_filesets(filepath)
         user = f"cli:{getpass.getuser()}" if username is None else username
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
 
         if src == "dat":
             set_process(
@@ -1064,9 +1054,7 @@ def scan_process(
                 fileset["description"] if "description" in fileset else ""
             )
             log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
             dropped_early_no_candidate += 1
             delete_original_fileset(fileset_id, conn)
             continue
@@ -1105,11 +1093,11 @@ def scan_process(
         fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"]
         category_text = f"Uploaded from {src}"
         log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}. State {source_status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
         category_text = "Upload information"
         log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {automatic_merged_filesets}. Filesets requiring manual merge (multiple candidates): {manual_merged_filesets}. Filesets requiring manual merge (matched with detection): {manual_merged_with_detection}. Filesets dropped, no candidate: {dropped_early_no_candidate}. Filesets matched with existing Full fileset: {match_with_full_fileset}. Filesets with mismatched files with Full fileset: {mismatch_with_full_fileset}. Filesets missing files compared to partial fileset candidate: {filesets_with_missing_files}."
         console_log(log_text)
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
 
 
 def pre_update_files(rom, filesets_check_for_full, transaction_id, conn):
@@ -1215,9 +1203,9 @@ def scan_perform_match(
                     log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
                     category_text = "Uploaded from scan."
                     create_log(
-                        escape_string(category_text),
+                        category_text,
                         user,
-                        escape_string(log_text),
+                        log_text,
                         conn,
                     )
                     console_log(log_text)
@@ -1273,9 +1261,9 @@ def scan_perform_match(
                     log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
                     category_text = "Uploaded from scan."
                     create_log(
-                        escape_string(category_text),
+                        category_text,
                         user,
-                        escape_string(log_text),
+                        log_text,
                         conn,
                     )
                     console_log(log_text)
@@ -1321,9 +1309,7 @@ def scan_perform_match(
         elif len(candidate_filesets) > 1:
             log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
             category_text = "Uploaded from scan."
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
             console_log(log_text)
             category_text = "Manual Merge - Multiple Candidates"
             log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidate_filesets)}."
@@ -1827,9 +1813,7 @@ def set_process(
             log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
             console_log_text = f"Early fileset drop as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
             no_candidate_logs.append(console_log_text)
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
             dropped_early_no_candidate += 1
             delete_original_fileset(fileset_id, conn)
             continue
@@ -1887,9 +1871,7 @@ def set_process(
                 )
                 log_text = f"Drop fileset, multiple filesets mapping to single detection. Name: {fileset_name} Description: {fileset_description}. Clashed with Fileset:{candidate} ({engine}:{gameid}-{platform}-{language})"
                 console_log(log_text)
-                create_log(
-                    escape_string(category_text), user, escape_string(log_text), conn
-                )
+                create_log(category_text, user, log_text, conn)
                 dropped_early_single_candidate_multiple_sets += 1
                 delete_original_fileset(set_fileset, conn)
                 del set_to_candidate_dict[set_fileset]
@@ -1950,18 +1932,14 @@ def set_process(
                 log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
                 console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name} Description: {fileset_description}."
                 console_log(console_log_text)
-                create_log(
-                    escape_string(category_text), user, escape_string(log_text), conn
-                )
+                create_log(category_text, user, log_text, conn)
                 dropped_early_no_candidate += 1
                 manual_merged_filesets -= 1
                 delete_original_fileset(fileset_id, conn)
             else:
                 log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
                 category_text = "Uploaded from dat."
-                create_log(
-                    escape_string(category_text), user, escape_string(log_text), conn
-                )
+                create_log(category_text, user, log_text, conn)
                 console_log(log_text)
                 category_text = "Manual Merge Required"
                 log_text = f"Merge Fileset:{fileset_id} manually. Possible matches are: {', '.join(f'Fileset:{id}' for id in candidates)}."
@@ -1982,11 +1960,11 @@ def set_process(
         fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"]
         category_text = f"Uploaded from {src}"
         log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}. State {source_status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
         category_text = "Upload information"
         log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {auto_merged_filesets}. Filesets dropped early (no candidate) - {dropped_early_no_candidate}. Filesets dropped early (mapping to single detection) - {dropped_early_single_candidate_multiple_sets}. Filesets requiring manual merge: {manual_merged_filesets}. Partial/Full filesets already present: {fully_matched_filesets}. Partial/Full filesets with mismatch {mismatch_filesets}."
         console_log(log_text)
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
 
 
 def set_filter_by_platform(gameid, candidate_filesets, conn):
@@ -2056,9 +2034,7 @@ def set_perform_match(
             log_text = f"Drop fileset as no matching candidates. Name: {fileset_name} Description: {fileset_description}."
             console_log_text = f"Fileset dropped as no candidates anymore. Name: {fileset_name} Description: {fileset_description}."
             no_candidate_logs.append(console_log_text)
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
             dropped_early_no_candidate += 1
             delete_original_fileset(fileset_id, conn)
         elif len(candidate_filesets) == 1:
@@ -2098,9 +2074,9 @@ def set_perform_match(
                     category_text = "Already present"
                     log_text = f"Already present as - Fileset:{matched_fileset_id}. Deleting Fileset:{fileset_id}"
                     log_last = create_log(
-                        escape_string(category_text),
+                        category_text,
                         user,
-                        escape_string(log_text),
+                        log_text,
                         conn,
                     )
                     update_history(fileset_id, matched_fileset_id, conn, log_last)
@@ -2111,9 +2087,9 @@ def set_perform_match(
                     log_text = f"Created Fileset:{fileset_id}. Name: {fileset_name} Description: {fileset_description}"
                     category_text = "Uploaded from dat."
                     create_log(
-                        escape_string(category_text),
+                        category_text,
                         user,
-                        escape_string(log_text),
+                        log_text,
                         conn,
                     )
                     console_log(log_text)
@@ -2226,7 +2202,7 @@ def add_manual_merge(
                 """
             cursor.execute(query, (child_fileset, parent_fileset))
 
-    create_log(escape_string(category_text), user, escape_string(log_text), conn)
+    create_log(category_text, user, log_text, conn)
     if print_text:
         print(print_text)
 
@@ -2968,9 +2944,7 @@ def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
     log_text = (
         f"Matched Fileset:{fileset_last} with Fileset:{fileset_id}. State {state}."
     )
-    log_last = create_log(
-        escape_string(category_text), user, escape_string(log_text), conn
-    )
+    log_last = create_log(category_text, user, log_text, conn)
     update_history(fileset_last, fileset_id, conn, log_last)
 
 
@@ -2992,7 +2966,7 @@ def log_scan_match_with_full(
             f"Fileset matched completely with Full Fileset:{candidate_id}. Dropping."
         )
     print(log_text)
-    create_log(escape_string(category_text), user, escape_string(log_text), conn)
+    create_log(category_text, user, log_text, conn)
 
 
 def finalize_fileset_insertion(
@@ -3007,9 +2981,7 @@ def finalize_fileset_insertion(
         category_text = f"Uploaded from {src}"
         if src != "user":
             log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
 
 
 def user_integrity_check(data, ip, game_metadata=None):
@@ -3051,9 +3023,7 @@ def user_integrity_check(data, ip, game_metadata=None):
 
             user = f"cli:{getpass.getuser()}"
 
-            create_log(
-                escape_string(category_text), user, escape_string(log_text), conn
-            )
+            create_log(category_text, user, log_text, conn)
 
             matched_map = find_matching_filesets(data, conn, src)
 
@@ -3186,7 +3156,7 @@ def user_integrity_check(data, ip, game_metadata=None):
     finally:
         category_text = f"Uploaded from {src}"
         log_text = f"Completed loading file, State {source_status}. Transaction: {transaction_id}"
-        create_log(escape_string(category_text), user, escape_string(log_text), conn)
+        create_log(category_text, user, log_text, conn)
         # conn.close()
     return matched_map, missing_map, extra_map
 

From 5e4627b48bae406db54007465fab0f92f6665eb8 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 26 Jul 2025 13:48:57 +0530
Subject: [PATCH 15/47] INTEGRITY: Remove depracated/redundant code.

---
 db_functions.py           | 617 +-------------------------------------
 fileset.py                |  48 ++-
 user_fileset_functions.py | 205 -------------
 3 files changed, 23 insertions(+), 847 deletions(-)
 delete mode 100644 user_fileset_functions.py

diff --git a/db_functions.py b/db_functions.py
index 272935b6..9b385ebb 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -1,18 +1,14 @@
 import pymysql
 import json
-from collections import Counter
 import getpass
 import time
 import hashlib
 import os
-from pymysql.converters import escape_string
 from collections import defaultdict
 import re
 import copy
 import sys
 
-SPECIAL_SYMBOLS = '/":*|\\?%<>\x7f'
-
 
 def db_connect():
     console_log("Connecting to the Database.")
@@ -324,59 +320,6 @@ def delete_filesets(conn):
         cursor.execute(query)
 
 
-def my_escape_string(s: str) -> str:
-    """
-    Escape strings
-
-    Escape the following:
-    - escape char: \x81
-    - unallowed filename chars: https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
-    - control chars < 0x20
-    """
-    new_name = ""
-    for char in s:
-        if char == "\x81":
-            new_name += "\x81\x79"
-        elif char in SPECIAL_SYMBOLS or ord(char) < 0x20:
-            new_name += "\x81" + chr(0x80 + ord(char))
-        else:
-            new_name += char
-    return new_name
-
-
-def encode_punycode(orig):
-    """
-    Punyencode strings
-
-    - escape special characters and
-    - ensure filenames can't end in a space or dotif temp == None:
-    """
-    s = my_escape_string(orig)
-    encoded = s.encode("punycode").decode("ascii")
-    # punyencoding adds an '-' at the end when there are no special chars
-    # don't use it for comparing
-    compare = encoded
-    if encoded.endswith("-"):
-        compare = encoded[:-1]
-    if orig != compare or compare[-1] in " .":
-        return "xn--" + encoded
-    return orig
-
-
-def punycode_need_encode(orig):
-    """
-    A filename needs to be punyencoded when it:
-
-    - contains a char that should be escaped or
-    - ends with a dot or a space.
-    """
-    if not all((0x20 <= ord(c) < 0x80) and c not in SPECIAL_SYMBOLS for c in orig):
-        return True
-    if orig[-1] in " .":
-        return True
-    return False
-
-
 def create_log(category, user, text, conn):
     with conn.cursor() as cursor:
         try:
@@ -643,233 +586,6 @@ def db_insert(data_arr, username=None, skiplog=False):
         conn.close()
 
 
-def compare_filesets(id1, id2, conn):
-    with conn.cursor() as cursor:
-        cursor.execute(
-            "SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = %s",
-            (id1,),
-        )
-        fileset1 = cursor.fetchall()
-        cursor.execute(
-            "SELECT name, size, `size-r`, `size-rd`, checksum FROM file WHERE fileset = %s",
-            (id2,),
-        )
-        fileset2 = cursor.fetchall()
-
-    # Sort filesets on checksum
-    fileset1.sort(key=lambda x: x[2])
-    fileset2.sort(key=lambda x: x[2])
-
-    if len(fileset1) != len(fileset2):
-        return False
-
-    for i in range(len(fileset1)):
-        # If checksums do not match
-        if fileset1[i][2] != fileset2[i][2]:
-            return False
-
-    return True
-
-
-def status_to_match(status):
-    order = ["detection", "dat", "scan", "partialmatch", "fullmatch", "user"]
-    return order[: order.index(status)]
-
-
-def find_matching_game(game_files):
-    matching_games = []  # All matching games
-    matching_filesets = []  # All filesets containing one file from game_files
-    matches_count = 0  # Number of files with a matching detection entry
-
-    conn = db_connect()
-
-    for file in game_files:
-        checksum = file[1]
-
-        query = "SELECT file.fileset as file_fileset FROM filechecksum JOIN file ON filechecksum.file = file.id WHERE filechecksum.checksum = %s AND file.detection = TRUE"
-        with conn.cursor() as cursor:
-            cursor.execute(query, (checksum,))
-            records = cursor.fetchall()
-
-        # If file is not part of detection entries, skip it
-        if len(records) == 0:
-            continue
-
-        matches_count += 1
-        for record in records:
-            matching_filesets.append(record[0])
-
-    # Check if there is a fileset_id that is present in all results
-    for key, value in Counter(matching_filesets).items():
-        with conn.cursor() as cursor:
-            cursor.execute(
-                "SELECT COUNT(file.id) FROM file JOIN fileset ON file.fileset = fileset.id WHERE fileset.id = %s",
-                (key,),
-            )
-            count_files_in_fileset = cursor.fetchone()["COUNT(file.id)"]
-
-        # We use < instead of != since one file may have more than one entry in the fileset
-        # We see this in Drascula English version, where one entry is duplicated
-        if value < matches_count or value < count_files_in_fileset:
-            continue
-
-        with conn.cursor() as cursor:
-            cursor.execute(
-                "SELECT engineid, game.id, gameid, platform, language, `key`, src, fileset.id as fileset FROM game JOIN fileset ON fileset.game = game.id JOIN engine ON engine.id = game.engine WHERE fileset.id = %s",
-                (key,),
-            )
-            records = cursor.fetchall()
-
-        matching_games.append(records[0])
-
-    if len(matching_games) != 1:
-        return matching_games
-
-    # Check the current fileset priority with that of the match
-    with conn.cursor() as cursor:
-        cursor.execute(
-            f"SELECT id FROM fileset, ({query}) AS res WHERE id = file_fileset AND status IN ({', '.join(['%s'] * len(game_files[3]))})",
-            status_to_match(game_files[3]),
-        )
-        records = cursor.fetchall()
-
-    # If priority order is correct
-    if len(records) != 0:
-        return matching_games
-
-    if compare_filesets(matching_games[0]["fileset"], game_files[0][0], conn):
-        with conn.cursor() as cursor:
-            cursor.execute(
-                "UPDATE fileset SET `delete` = TRUE WHERE id = %s", (game_files[0][0],)
-            )
-        return []
-
-    return matching_games
-
-
-def merge_filesets(detection_id, dat_id):
-    conn = db_connect()
-
-    try:
-        with conn.cursor() as cursor:
-            cursor.execute(
-                "SELECT DISTINCT(filechecksum.checksum), checksize, checktype FROM filechecksum JOIN file on file.id = filechecksum.file WHERE fileset = %s'",
-                (detection_id,),
-            )
-            detection_files = cursor.fetchall()
-
-            for file in detection_files:
-                checksum = file[0]
-                checksize = file[1]
-                checktype = file[2]
-
-                cursor.execute(
-                    "DELETE FROM file WHERE checksum = %s AND fileset = %s LIMIT 1",
-                    (checksum, detection_id),
-                )
-                cursor.execute(
-                    "UPDATE file JOIN filechecksum ON filechecksum.file = file.id SET detection = TRUE, checksize = %s, checktype = %s WHERE fileset = %s AND filechecksum.checksum = %s",
-                    (checksize, checktype, dat_id, checksum),
-                )
-
-            cursor.execute(
-                "INSERT INTO history (`timestamp`, fileset, oldfileset) VALUES (FROM_UNIXTIME(%s), %s, %s)",
-                (int(time.time()), dat_id, detection_id),
-            )
-            cursor.execute("SELECT LAST_INSERT_ID()")
-            history_last = cursor.fetchone()["LAST_INSERT_ID()"]
-
-            cursor.execute(
-                "UPDATE history SET fileset = %s WHERE fileset = %s",
-                (dat_id, detection_id),
-            )
-            cursor.execute("DELETE FROM fileset WHERE id = %s", (detection_id,))
-
-        conn.commit()
-    except Exception as e:
-        conn.rollback()
-        print(f"Error merging filesets: {e}")
-    finally:
-        # conn.close()
-        pass
-
-    return history_last
-
-
-def populate_matching_games():
-    conn = db_connect()
-
-    # Getting unmatched filesets
-    unmatched_filesets = []
-
-    with conn.cursor() as cursor:
-        cursor.execute(
-            "SELECT fileset.id, filechecksum.checksum, src, status FROM fileset JOIN file ON file.fileset = fileset.id JOIN filechecksum ON file.id = filechecksum.file WHERE fileset.game IS NULL AND status != 'user'"
-        )
-        unmatched_files = cursor.fetchall()
-
-    # Splitting them into different filesets
-    i = 0
-    while i < len(unmatched_files):
-        cur_fileset = unmatched_files[i][0]
-        temp = []
-        while i < len(unmatched_files) and cur_fileset == unmatched_files[i][0]:
-            temp.append(unmatched_files[i])
-            i += 1
-        unmatched_filesets.append(temp)
-
-    for fileset in unmatched_filesets:
-        matching_games = find_matching_game(fileset)
-
-        if len(matching_games) != 1:  # If there is no match/non-unique match
-            continue
-
-        matched_game = matching_games[0]
-
-        # Update status depending on $matched_game["src"] (dat -> partialmatch, scan -> fullmatch)
-        status = fileset[0][2]
-        if fileset[0][2] == "dat":
-            status = "partialmatch"
-        elif fileset[0][2] == "scan":
-            status = "fullmatch"
-
-        # Convert NULL values to string with value NULL for printing
-        matched_game = {k: "NULL" if v is None else v for k, v in matched_game.items()}
-
-        category_text = f"Matched from {fileset[0][2]}"
-        log_text = f"Matched game {matched_game['engineid']}:\n{matched_game['gameid']}-{matched_game['platform']}-{matched_game['language']}\nvariant {matched_game['key']}. State {status}. Fileset:{fileset[0][0]}."
-
-        # Updating the fileset.game value to be $matched_game["id"]
-        query = "UPDATE fileset SET game = %s, status = %s, `key` = %s WHERE id = %s"
-
-        history_last = merge_filesets(matched_game["fileset"], fileset[0][0])
-
-        if cursor.execute(
-            query, (matched_game["id"], status, matched_game["key"], fileset[0][0])
-        ):
-            user = f"cli:{getpass.getuser()}"
-
-            create_log(
-                "Fileset merge",
-                user,
-                f"Merged Fileset:{matched_game['fileset']} and Fileset:{fileset[0][0]}",
-                conn,
-            )
-
-            # Matching log
-            log_last = create_log(conn, category_text, user, conn, log_text)
-
-            # Add log id to the history table
-            cursor.execute(
-                "UPDATE history SET log = %s WHERE id = %s", (log_last, history_last)
-            )
-
-        try:
-            conn.commit()
-        except Exception:
-            print("Updating matched games failed")
-
-
 def match_fileset(data_arr, username=None, skiplog=False):
     """
     data_arr -> tuple : (header, game_data, resources, filepath).
@@ -943,33 +659,6 @@ def match_fileset(data_arr, username=None, skiplog=False):
                 user,
                 skiplog,
             )
-        else:
-            game_data_lookup = {fs["name"]: fs for fs in game_data}
-            for fileset in game_data:
-                process_fileset(
-                    fileset,
-                    resources,
-                    detection,
-                    src,
-                    conn,
-                    transaction_id,
-                    filepath,
-                    author,
-                    version,
-                    source_status,
-                    user,
-                    game_data_lookup,
-                )
-            finalize_fileset_insertion(
-                conn,
-                transaction_id,
-                src,
-                filepath,
-                author,
-                version,
-                source_status,
-                user,
-            )
         conn.commit()
     except Exception as e:
         conn.rollback()
@@ -2429,78 +2118,6 @@ def is_candidate_by_checksize(candidate, fileset, conn):
         return False
 
 
-def process_fileset(
-    fileset,
-    resources,
-    detection,
-    src,
-    conn,
-    transaction_id,
-    filepath,
-    author,
-    version,
-    source_status,
-    user,
-    game_data_lookup,
-):
-    if detection:
-        insert_game_data(fileset, conn)
-
-    # Ideally romof should be enough, but adding in case of an edge case
-    current_name = fileset.get("romof") or fileset.get("cloneof")
-
-    # Iteratively check for extra files if linked to multiple filesets
-    while current_name:
-        if current_name in resources:
-            fileset["rom"] += resources[current_name]["rom"]
-            break
-
-        elif current_name in game_data_lookup:
-            linked = game_data_lookup[current_name]
-            fileset["rom"] += linked.get("rom", [])
-            current_name = linked.get("romof") or linked.get("cloneof")
-        else:
-            break
-
-    key = calc_key(fileset) if not detection else ""
-    megakey = calc_megakey(fileset) if detection else ""
-    log_text = f"size {os.path.getsize(filepath)}, author {author}, version {version}. State {source_status}."
-    if src != "dat":
-        matched_map = find_matching_filesets(fileset, conn, src)
-    else:
-        matched_map = matching_set(fileset, conn)
-
-    (fileset_id, _) = insert_new_fileset(
-        fileset, conn, detection, src, key, megakey, transaction_id, log_text, user
-    )
-
-    if matched_map:
-        handle_matched_filesets(
-            fileset_id,
-            matched_map,
-            fileset,
-            conn,
-            detection,
-            src,
-            key,
-            megakey,
-            transaction_id,
-            log_text,
-            user,
-        )
-
-
-def insert_game_data(fileset, conn):
-    engine_name = fileset["engine"]
-    engineid = fileset["sourcefile"]
-    gameid = fileset["name"]
-    title = fileset["title"]
-    extra = fileset["extra"]
-    platform = fileset["platform"]
-    lang = fileset["language"]
-    insert_game(engine_name, engineid, title, gameid, extra, platform, lang, conn)
-
-
 def find_matching_filesets(fileset, conn, status):
     matched_map = defaultdict(list)
     if status != "user":
@@ -2535,105 +2152,6 @@ def find_matching_filesets(fileset, conn, status):
     return matched_map
 
 
-def matching_set(fileset, conn):
-    matched_map = defaultdict(list)
-    with conn.cursor() as cursor:
-        for file in fileset["rom"]:
-            matched_set = set()
-            if "md5" in file:
-                checksum = file["md5"]
-                if ":" in checksum:
-                    checksum = checksum.split(":")[1]
-                size = file["size"]
-
-                query = """
-                    SELECT DISTINCT fs.id AS fileset_id
-                    FROM fileset fs
-                    JOIN file f ON fs.id = f.fileset
-                    JOIN filechecksum fc ON f.id = fc.file
-                    WHERE fc.checksum = %s AND fc.checktype LIKE 'md5%'
-                    AND fc.checksize > %s
-                    AND fs.status = 'detection'
-                """
-                cursor.execute(query, (checksum, size))
-                records = cursor.fetchall()
-                if records:
-                    for record in records:
-                        matched_set.add(record["fileset_id"])
-            for id in matched_set:
-                matched_map[id].append(file)
-    return matched_map
-
-
-def handle_matched_filesets(
-    fileset_last,
-    matched_map,
-    fileset,
-    conn,
-    detection,
-    src,
-    key,
-    megakey,
-    transaction_id,
-    log_text,
-    user,
-):
-    matched_list = sorted(matched_map.items(), key=lambda x: len(x[1]), reverse=True)
-    is_full_matched = False
-    with conn.cursor() as cursor:
-        for matched_fileset_id, matched_count in matched_list:
-            if is_full_matched:
-                break
-            cursor.execute(
-                "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
-            )
-            status = cursor.fetchone()["status"]
-            cursor.execute(
-                "SELECT COUNT(file.id) FROM file WHERE fileset = %s",
-                (matched_fileset_id,),
-            )
-            count = cursor.fetchone()["COUNT(file.id)"]
-
-            if status in ["detection", "obsolete"] and count == len(matched_count):
-                is_full_matched = True
-                update_fileset_status(
-                    cursor, matched_fileset_id, "full" if src != "dat" else "partial"
-                )
-                populate_file(fileset, matched_fileset_id, conn, detection)
-                log_matched_fileset(
-                    src,
-                    fileset_last,
-                    matched_fileset_id,
-                    "full" if src != "dat" else "partial",
-                    user,
-                    conn,
-                )
-                delete_original_fileset(fileset_last, conn)
-            elif status == "full" and len(fileset["rom"]) == count:
-                is_full_matched = True
-                log_matched_fileset(
-                    src, fileset_last, matched_fileset_id, "full", user, conn
-                )
-                delete_original_fileset(fileset_last, conn)
-                return
-            elif (status == "partial") and count == len(matched_count):
-                is_full_matched = True
-                update_fileset_status(cursor, matched_fileset_id, "full")
-                populate_file(fileset, matched_fileset_id, conn, detection)
-                log_matched_fileset(
-                    src, fileset_last, matched_fileset_id, "full", user, conn
-                )
-                delete_original_fileset(fileset_last, conn)
-            elif status == "scan" and count == len(matched_count):
-                log_matched_fileset(
-                    src, fileset_last, matched_fileset_id, "full", user, conn
-                )
-            elif src == "dat":
-                log_matched_fileset(
-                    src, fileset_last, matched_fileset_id, "partial matched", user, conn
-                )
-
-
 def delete_original_fileset(fileset_id, conn):
     with conn.cursor() as cursor:
         cursor.execute("DELETE FROM file WHERE fileset = %s", (fileset_id,))
@@ -2652,131 +2170,6 @@ def update_fileset_status(cursor, fileset_id, status):
     )
 
 
-def populate_file(fileset, fileset_id, conn, detection):
-    with conn.cursor() as cursor:
-        cursor.execute("SELECT * FROM file WHERE fileset = %s", (fileset_id,))
-        target_files = cursor.fetchall()
-        target_files_dict = {}
-        for target_file in target_files:
-            cursor.execute(
-                "SELECT * FROM filechecksum WHERE file = %s", (target_file["id"],)
-            )
-            target_checksums = cursor.fetchall()
-            for checksum in target_checksums:
-                target_files_dict[checksum["checksum"]] = target_file
-                target_files_dict[target_file["id"]] = (
-                    f"{checksum['checktype']}-{checksum['checksize']}"
-                )
-        for file in fileset["rom"]:
-            file_exists = False
-            checksum = ""
-            checksize = 5000
-            checktype = "None"
-            if "md5" in file:
-                checksum = file["md5"]
-            else:
-                for key, value in file.items():
-                    if "md5" in key:
-                        checksize, checktype, checksum = get_checksum_props(key, value)
-                        break
-
-            if not detection:
-                checktype = "None"
-                detection = 0
-            detection_type = (
-                f"{checktype}-{checksize}" if checktype != "None" else f"{checktype}"
-            )
-
-            extended_file_size = True if "size-r" in file else False
-
-            name = normalised_path(file["name"])
-            escaped_name = escape_string(name)
-
-            columns = ["name", "size"]
-            values = [f"'{escaped_name}'", f"'{file['size']}'"]
-
-            if extended_file_size:
-                columns.extend(["`size-r`", "`size-rd`"])
-                values.extend([f"'{file['size-r']}'", f"'{file['size-rd']}'"])
-
-            columns.extend(
-                ["checksum", "fileset", "detection", "detection_type", "`timestamp`"]
-            )
-            values.extend(
-                [
-                    f"'{checksum}'",
-                    str(fileset_id),
-                    str(detection),
-                    f"'{detection_type}'",
-                    "NOW()",
-                ]
-            )
-
-            query = (
-                f"INSERT INTO file ({', '.join(columns)}) VALUES ({', '.join(values)})"
-            )
-            cursor.execute(query)
-            cursor.execute("SET @file_last = LAST_INSERT_ID()")
-            cursor.execute("SELECT @file_last AS file_id")
-
-            file_id = cursor.fetchone()["file_id"]
-            d_type = 0
-            previous_checksums = {}
-
-            for key, value in file.items():
-                if key not in ["name", "size", "size-r", "size-rd", "sha1", "crc"]:
-                    insert_filechecksum(file, key, file_id, conn)
-                    if value in target_files_dict and not file_exists:
-                        cursor.execute(
-                            f"SELECT detection_type FROM file WHERE id = {target_files_dict[value]['id']}"
-                        )
-                        d_type = cursor.fetchone()["detection_type"]
-                        file_exists = True
-                        cursor.execute(
-                            f"SELECT * FROM file WHERE fileset = {fileset_id}"
-                        )
-                        target_files = cursor.fetchall()
-                        for target_file in target_files:
-                            cursor.execute(
-                                f"SELECT * FROM filechecksum WHERE file = {target_file['id']}"
-                            )
-                            target_checksums = cursor.fetchall()
-                            for checksum in target_checksums:
-                                previous_checksums[
-                                    f"{checksum['checktype']}-{checksum['checksize']}"
-                                ] = checksum["checksum"]
-                        cursor.execute(
-                            f"DELETE FROM file WHERE id = {target_files_dict[value]['id']}"
-                        )
-
-            if file_exists:
-                cursor.execute(
-                    f"SELECT checktype, checksize FROM filechecksum WHERE file = {file_id}"
-                )
-                existing_checks = cursor.fetchall()
-                existing_checksum = []
-                for existing_check in existing_checks:
-                    existing_checksum.append(
-                        existing_check["checktype"] + "-" + existing_check["checksize"]
-                    )
-                for key, value in previous_checksums.items():
-                    if key not in existing_checksum:
-                        checksize, checktype, checksum = get_checksum_props(key, value)
-                        cursor.execute(
-                            "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)",
-                            (file_id, checksize, checktype, checksum),
-                        )
-
-                cursor.execute(f"UPDATE file SET detection = 1 WHERE id = {file_id}")
-                cursor.execute(
-                    f"UPDATE file SET detection_type = '{d_type}' WHERE id = {file_id}"
-                )
-            else:
-                cursor.execute(
-                    f"UPDATE file SET detection_type = 'None' WHERE id = {file_id}"
-                )
-
-
 def set_populate_file(fileset, fileset_id, conn, detection):
     """
     Updates the old fileset in case of a match. Further deletes the newly created fileset which is not needed anymore.
@@ -3133,11 +2526,11 @@ def user_integrity_check(data, ip, game_metadata=None):
                 log_matched_fileset(
                     src, matched_fileset_id, matched_fileset_id, "full", user, conn
                 )
-            elif status == "partial" and count == matched_count:
-                populate_file(data, matched_fileset_id, conn, None, src)
-                log_matched_fileset(
-                    src, matched_fileset_id, matched_fileset_id, "partial", user, conn
-                )
+            # elif status == "partial" and count == matched_count:
+            #     populate_file(data, matched_fileset_id, conn, None, src)
+            #     log_matched_fileset(
+            #         src, matched_fileset_id, matched_fileset_id, "partial", user, conn
+            #     )
             elif status == "user" and count == matched_count:
                 add_usercount(matched_fileset_id, conn)
                 log_matched_fileset(
diff --git a/fileset.py b/fileset.py
index 7ee5dd85..4377d102 100644
--- a/fileset.py
+++ b/fileset.py
@@ -10,10 +10,6 @@
 import json
 import html as html_lib
 import os
-from user_fileset_functions import (
-    user_insert_fileset,
-    match_and_merge_user_filesets,
-)
 from pagination import create_page
 import difflib
 from db_functions import (
@@ -344,10 +340,6 @@ def fileset():
                 connection.commit()
                 html += "<p id='delete-confirm'>Fileset marked for deletion</p>"
 
-            if "match" in request.form:
-                match_and_merge_user_filesets(request.form["match"])
-                return redirect(url_for("fileset", id=request.form["match"]))
-
             # Generate the HTML for the fileset history
             cursor.execute(
                 "SELECT `timestamp`, category, `text`, id FROM log WHERE `text` REGEXP 'Fileset:%s' ORDER BY `timestamp` DESC, id DESC",
@@ -1134,28 +1126,24 @@ def validate():
 
     json_response = {"error": error_codes["success"], "files": []}
 
-    if not game_metadata:
-        if not json_object.get("files"):
-            json_response["error"] = error_codes["empty"]
-            del json_response["files"]
-            json_response["status"] = "empty_fileset"
-            return jsonify(json_response)
-
-        json_response["error"] = error_codes["no_metadata"]
-        del json_response["files"]
-        json_response["status"] = "no_metadata"
-
-        conn = db_connect()
-        try:
-            fileset_id = user_insert_fileset(json_object, ip, conn)
-        finally:
-            conn.close()
-        json_response["fileset"] = fileset_id
-        return jsonify(json_response)
-
-    matched_map = {}
-    missing_map = {}
-    extra_map = {}
+    # if not game_metadata:
+    #     if not json_object.get("files"):
+    #         json_response["error"] = error_codes["empty"]
+    #         del json_response["files"]
+    #         json_response["status"] = "empty_fileset"
+    #         return jsonify(json_response)
+
+    #     json_response["error"] = error_codes["no_metadata"]
+    #     del json_response["files"]
+    #     json_response["status"] = "no_metadata"
+
+    #     conn = db_connect()
+    #     try:
+    #         fileset_id = user_insert_fileset(json_object, ip, conn)
+    #     finally:
+    #         conn.close()
+    #     json_response["fileset"] = fileset_id
+    #     return jsonify(json_response)
 
     file_object = json_object["files"]
     if not file_object:
diff --git a/user_fileset_functions.py b/user_fileset_functions.py
deleted file mode 100644
index 6ca1c1f0..00000000
--- a/user_fileset_functions.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import hashlib
-import time
-from db_functions import (
-    db_connect,
-    insert_fileset,
-    insert_file,
-    insert_filechecksum,
-    find_matching_game,
-    merge_filesets,
-    create_log,
-    calc_megakey,
-)
-import getpass
-import pymysql
-
-
-def user_calc_key(user_fileset):
-    key_string = ""
-    for file in user_fileset:
-        for key, value in file.items():
-            if key != "checksums":
-                key_string += ":" + str(value)
-                continue
-            for checksum_pair in value:
-                key_string += ":" + checksum_pair["checksum"]
-    key_string = key_string.strip(":")
-    return hashlib.md5(key_string.encode()).hexdigest()
-
-
-def file_json_to_array(file_json_object):
-    res = {}
-    for key, value in file_json_object.items():
-        if key != "checksums":
-            res[key] = value
-            continue
-        for checksum_pair in value:
-            res[checksum_pair["type"]] = checksum_pair["checksum"]
-    return res
-
-
-def user_insert_queue(user_fileset, conn):
-    query = "INSERT INTO queue (time, notes, fileset, ticketid, userid, commit) VALUES (%s, NULL, @fileset_last, NULL, NULL, NULL)"
-
-    with conn.cursor() as cursor:
-        cursor.execute(query, (int(time.time()),))
-        conn.commit()
-
-
-def user_insert_fileset(user_fileset, ip, conn):
-    src = "user"
-    detection = False
-    key = ""
-    megakey = calc_megakey(user_fileset)
-    with conn.cursor() as cursor:
-        cursor.execute("SELECT MAX(`transaction`) FROM transactions")
-        transaction_id = cursor.fetchone()["MAX(`transaction`)"] + 1
-        log_text = "from user submitted files"
-        cursor.execute("SET @fileset_time_last = %s", (int(time.time()),))
-        if insert_fileset(
-            src, detection, key, megakey, transaction_id, log_text, conn, ip
-        ):
-            for file in user_fileset["files"]:
-                file = file_json_to_array(file)
-                insert_file(file, detection, src, conn)
-                for key, value in file.items():
-                    if key not in ["name", "size"]:
-                        insert_filechecksum(file, key, conn)
-        cursor.execute("SELECT @fileset_last")
-        fileset_id = cursor.fetchone()["@fileset_last"]
-    conn.commit()
-    return fileset_id
-
-
-def match_and_merge_user_filesets(id):
-    conn = db_connect()
-
-    # Getting unmatched filesets
-    unmatched_filesets = []
-
-    with conn.cursor() as cursor:
-        cursor.execute(
-            "SELECT fileset.id, filechecksum.checksum, src, status FROM fileset JOIN file ON file.fileset = fileset.id JOIN filechecksum ON file.id = filechecksum.file WHERE status = 'user' AND fileset.id = %s",
-            (id,),
-        )
-        unmatched_files = cursor.fetchall()
-
-    # Splitting them into different filesets
-    i = 0
-    while i < len(unmatched_files):
-        cur_fileset = unmatched_files[i][0]
-        temp = []
-        while i < len(unmatched_files) and cur_fileset == unmatched_files[i][0]:
-            temp.append(unmatched_files[i])
-            i += 1
-        unmatched_filesets.append(temp)
-
-    for fileset in unmatched_filesets:
-        matching_games = find_matching_game(fileset)
-
-        if len(matching_games) != 1:  # If there is no match/non-unique match
-            continue
-
-        matched_game = matching_games[0]
-
-        status = "full"
-
-        # Convert NULL values to string with value NULL for printing
-        matched_game = {k: "NULL" if v is None else v for k, v in matched_game.items()}
-
-        category_text = f"Matched from {fileset[0][2]}"
-        log_text = f"Matched game {matched_game['engineid']}:\n{matched_game['gameid']}-{matched_game['platform']}-{matched_game['language']}\nvariant {matched_game['key']}. State {status}. Fileset:{fileset[0][0]}."
-
-        # Updating the fileset.game value to be $matched_game["id"]
-        query = "UPDATE fileset SET game = %s, status = %s, `key` = %s WHERE id = %s"
-
-        history_last = merge_filesets(matched_game["fileset"], fileset[0][0])
-
-        if cursor.execute(
-            query, (matched_game["id"], status, matched_game["key"], fileset[0][0])
-        ):
-            user = f"cli:{getpass.getuser()}"
-
-            # Merge log
-            create_log(
-                "Fileset merge",
-                user,
-                pymysql.escape_string(
-                    conn,
-                    f"Merged Fileset:{matched_game['fileset']} and Fileset:{fileset[0][0]}",
-                ),
-            )
-
-            # Matching log
-            log_last = create_log(
-                pymysql.escape_string(conn, category_text),
-                user,
-                pymysql.escape_string(conn, log_text),
-            )
-
-            # Add log id to the history table
-            cursor.execute(
-                "UPDATE history SET log = %s WHERE id = %s", (log_last, history_last)
-            )
-
-        if not conn.commit():
-            print("Updating matched games failed")
-    with conn.cursor() as cursor:
-        cursor.execute(
-            """
-            SELECT fileset.id, filechecksum.checksum, src, status
-            FROM fileset
-            JOIN file ON file.fileset = fileset.id
-            JOIN filechecksum ON file.id = filechecksum.file
-            WHERE status = 'user' AND fileset.id = %s
-        """,
-            (id,),
-        )
-        unmatched_files = cursor.fetchall()
-
-    unmatched_filesets = []
-    cur_fileset = None
-    temp = []
-    for file in unmatched_files:
-        if cur_fileset is None or cur_fileset != file["id"]:
-            if temp:
-                unmatched_filesets.append(temp)
-            cur_fileset = file["id"]
-            temp = []
-        temp.append(file)
-    if temp:
-        unmatched_filesets.append(temp)
-
-    for fileset in unmatched_filesets:
-        matching_games = find_matching_game(fileset)
-        if len(matching_games) != 1:
-            continue
-        matched_game = matching_games[0]
-        status = "full"
-        matched_game = {
-            k: ("NULL" if v is None else v) for k, v in matched_game.items()
-        }
-        category_text = f"Matched from {fileset[0]['src']}"
-        log_text = f"Matched game {matched_game['engineid']}: {matched_game['gameid']}-{matched_game['platform']}-{matched_game['language']} variant {matched_game['key']}. State {status}. Fileset:{fileset[0]['id']}."
-        query = """
-            UPDATE fileset
-            SET game = %s, status = %s, `key` = %s
-            WHERE id = %s
-        """
-        history_last = merge_filesets(matched_game["fileset"], fileset[0]["id"])
-        with conn.cursor() as cursor:
-            cursor.execute(
-                query,
-                (matched_game["id"], status, matched_game["key"], fileset[0]["id"]),
-            )
-            user = "cli:" + getpass.getuser()
-            create_log(
-                "Fileset merge",
-                user,
-                f"Merged Fileset:{matched_game['fileset']} and Fileset:{fileset[0]['id']}",
-            )
-            log_last = create_log(category_text, user, log_text)
-            cursor.execute(
-                "UPDATE history SET log = %s WHERE id = %s", (log_last, history_last)
-            )
-        conn.commit()

From 4222dde49e5924e74fff20afac46c1acad47b20a Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sun, 27 Jul 2025 01:53:39 +0530
Subject: [PATCH 16/47] INTEGRITY: Improve homepage navbar.

---
 fileset.py          | 118 +++++++++++++++++++++++++-------------------
 index.html          |   2 -
 pagination.py       |  16 ++++--
 static/style.css    |  76 +++++++++++++++++++++++-----
 templates/home.html | 105 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 249 insertions(+), 68 deletions(-)
 delete mode 100644 index.html
 create mode 100644 templates/home.html

diff --git a/fileset.py b/fileset.py
index 4377d102..cdcbe0d4 100644
--- a/fileset.py
+++ b/fileset.py
@@ -5,6 +5,7 @@
     url_for,
     render_template_string,
     jsonify,
+    render_template,
 )
 import pymysql.cursors
 import json
@@ -32,37 +33,12 @@
 
 @app.route("/")
 def index():
-    html = """
-    <!DOCTYPE html>
-    <html>
-    <head>
-        <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
-    </head>
-    <body>
-    <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-    <a href="{{ url_for('index') }}">
-        <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
-    </a>
-    </nav>
-    <h1 style="margin-top: 80px;">Fileset Database</h1>
-    <h2>Fileset Actions</h2>
-    <ul>
-        <li><a href="{{ url_for('fileset') }}">Fileset</a></li>
-        <li><a href="{{ url_for('user_games_list') }}">User Games List</a></li>
-        <li><a href="{{ url_for('ready_for_review') }}">Ready for review</a></li>
-        <li><a href="{{ url_for('fileset_search') }}">Fileset Search</a></li>
-    </ul>
-    <h2>Logs</h2>
-    <ul>
-        <li><a href="{{ url_for('logs') }}">Logs</a></li>
-    </ul>
-    <form action="{{ url_for('clear_database') }}" method="POST"> 
-        <button style="margin:100px 0 0 0; background-color:red"  type="submit"> Clear Database </button>
-    </form>
-    </body>
-    </html>
-    """
-    return render_template_string(html)
+    return redirect(url_for("logs"))
+
+
+@app.route("/home")
+def home():
+    return render_template("home.html")
 
 
 @app.route("/clear_database", methods=["POST"])
@@ -148,10 +124,18 @@ def fileset():
                 <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
             </head>
             <body>
-            <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-                <a href="{{{{ url_for('index') }}}}">
-                    <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
-                </a>
+            <nav>
+                <div class="logo">
+                    <a href="{{{{ url_for('home') }}}}">
+                        <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo">
+                    </a>
+                </div>
+                <div class="nav-buttons">
+                    <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
+                    <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
+                    <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                    <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                </div>
             </nav>
             <h2 style="margin-top: 80px;"><u>Fileset: {id}</u></h2>
             <table>
@@ -501,10 +485,18 @@ def merge_fileset(id):
                     <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
                 </head>
                 <body>
-                <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-                    <a href="{{{{ url_for('index') }}}}">
-                        <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
-                    </a>
+                <nav>
+                    <div class="logo">
+                        <a href="{{{{ url_for('home') }}}}">
+                            <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo">
+                        </a>
+                    </div>
+                    <div class="nav-buttons">
+                        <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
+                        <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
+                        <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                        <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                    </div>
                 </nav>
                 <h2 style="margin-top: 80px;">Search Results for '{search_query}'</h2>
                 <form method="POST">
@@ -540,10 +532,18 @@ def merge_fileset(id):
         <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
     </head>
     <body>
-    <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-        <a href="{{ url_for('index') }}">
-            <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
-        </a>
+    <nav>
+        <div class="logo">
+            <a href="{{ url_for('home') }}">
+                <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
+            </a>
+        </div>
+        <div class="nav-buttons">
+            <a href="{{ url_for('user_games_list') }}">User Games List</a>
+            <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs') }}">Logs</a>
+        </div>
     </nav>
     <h2 style="margin-top: 80px;">Search Fileset to Merge</h2>
     <form method="POST">
@@ -599,10 +599,18 @@ def possible_merge_filesets(id):
                 <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
             </head>
             <body>
-            <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-                <a href="{{{{ url_for('index') }}}}">
-                    <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo" style="height:60px; vertical-align:middle;">
-                </a>
+            <nav>
+                <div class="logo">
+                    <a href="{{{{ url_for('home') }}}}">
+                        <img src="{{{{ url_for('static', filename='integrity_service_logo_256.png') }}}}" alt="Logo">
+                    </a>
+                </div>
+                <div class="nav-buttons">
+                    <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
+                    <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
+                    <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                    <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                </div>
             </nav>
             <h2 style="margin-top: 80px;">Possible Merges for fileset-'{id}'</h2>
             <table>
@@ -731,10 +739,18 @@ def highlight_differences(source, target):
                 <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
             </head>
             <body>
-            <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-                <a href="{{ url_for('index') }}">
-                    <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
-                </a>
+            <nav>
+                <div class="logo">
+                    <a href="{{ url_for('home') }}">
+                        <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
+                    </a>
+                </div>
+                <div class="nav-buttons">
+                    <a href="{{ url_for('user_games_list') }}">User Games List</a>
+                    <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
+                    <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+                    <a href="{{ url_for('logs') }}">Logs</a>
+                </div>
             </nav>
             <h2 style="margin-top: 80px;">Confirm Merge</h2>
             <form id="confirm_merge_form">
diff --git a/index.html b/index.html
deleted file mode 100644
index a6c5e6d5..00000000
--- a/index.html
+++ /dev/null
@@ -1,2 +0,0 @@
-<a href="games_list.php">List of Detection entries</a><br/>
-<a href="logs.php">Logs of developer actions</a><br/>
diff --git a/pagination.py b/pagination.py
index 8497ec4d..79d61e93 100644
--- a/pagination.py
+++ b/pagination.py
@@ -144,10 +144,18 @@ def create_page(
         <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
     </head>
     <body>
-    <nav style="position: fixed; top: 0; left: 0; right: 0; background: white; padding: 3px; border-bottom: 1px solid #ccc;">
-        <a href="{{ url_for('index') }}">
-            <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo" style="height:60px; vertical-align:middle;">
-        </a>
+    <nav>
+        <div class="logo">
+            <a href="{{ url_for('home') }}">
+                <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
+            </a>
+        </div>
+        <div class="nav-buttons">
+            <a href="{{ url_for('user_games_list') }}">User Games List</a>
+            <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs') }}">Logs</a>
+        </div>
     </nav>
 <form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
 <table style="margin-top: 80px;">
diff --git a/static/style.css b/static/style.css
index 1c9e599c..527824b7 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3,18 +3,34 @@
   font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
 
-td, th {
+td,
+th {
   padding-inline: 5px;
 }
 
-tr:nth-child(even) {background-color: #f2f2f2;}
-tr {background-color: white;}
+tr:nth-child(even) {
+  background-color: #f2f2f2;
+}
+
+tr {
+  background-color: white;
+}
+
+tr:hover {
+  background-color: #ddd;
+}
+
+tr.games_list:hover {
+  cursor: pointer;
+}
 
-tr:hover {background-color: #ddd;}
-tr.games_list:hover {cursor: pointer;}
+tr.filter:hover {
+  background-color: inherit;
+}
 
-tr.filter:hover {background-color:inherit;}
-td.filter {text-align: center;}
+td.filter {
+  text-align: center;
+}
 
 th {
   padding-top: 5px;
@@ -26,7 +42,38 @@ th {
 
 th a {
   color: white;
-  text-decoration: none; /* no underline */
+  text-decoration: none;
+  /* no underline */
+}
+
+nav {
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  border-bottom: 1px solid #ccc;
+  background-color: white;
+  display: flex;
+  padding: 0px 20px 0px 20px;
+  align-items: center;
+  flex-wrap: wrap;
+  z-index: 1000;
+}
+
+.nav-buttons a {
+  text-decoration: none;
+  padding: 10px 16px;
+  border-radius: 6px;
+  margin-left: 10px;
+}
+
+/* .nav-buttons a:hover {
+  box-shadow: 0 4px 12px rgba(39, 145, 232, 0.4);
+} */
+
+.logo img {
+  height: 75px;
+  vertical-align: middle;
 }
 
 button {
@@ -39,8 +86,11 @@ button {
 }
 
 button:hover {
-  background-color: #29afe0;
+  background-color: #1f7fc4;
+  color: #f2f2f2;
+  box-shadow: 0 4px 12px rgba(39, 145, 232, 0.4);
 }
+
 button:active {
   background-color: #1a95c2;
 }
@@ -55,13 +105,17 @@ input[type=submit] {
 }
 
 input[type=submit]:hover {
-  background-color: #29afe0;
+  background-color: #1f7fc4;
+  color: #f2f2f2;
+  box-shadow: 0 4px 12px rgba(39, 145, 232, 0.4);
 }
+
 input[type=submit]:active {
   background-color: #1a95c2;
 }
 
-input[type=text], select {
+input[type=text],
+select {
   width: 25%;
   height: 38px;
   padding: 6px 12px;
diff --git a/templates/home.html b/templates/home.html
new file mode 100644
index 00000000..458f5fbb
--- /dev/null
+++ b/templates/home.html
@@ -0,0 +1,105 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+    <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+    <style>
+        body {
+            margin: 0;
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+        }
+
+        .title {
+            align-items: center;
+            display: flex;
+            flex-direction: column;
+            height: 100vh;
+            gap: 50px;
+        }
+
+        .fileset_database {
+            margin-top: 10vh;
+            text-align: center;
+            background-color: #ffffff;
+            color: #000000;
+            border-radius: 5px;
+            padding: 10px;
+            font-size: 100px
+        }
+
+        .dev {
+            background-color: #fafeff;
+            padding: 10px;
+            border-radius: 5px;
+            margin-left: auto;
+        }
+
+        button {
+            background-color: #d9534f;
+            color: white;
+            padding: 10px 20px;
+            font-size: 16px;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+            transition: background-color 0.3s, box-shadow 0.3s;
+        }
+
+        button:hover {
+            background-color: #c9302c;
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
+        }
+
+        h3 {
+            color: #000000;
+        }
+
+        @media (max-width: 768px) {
+            .fileset_database {
+                font-size: 48px;
+            }
+        }
+
+        @media (max-width: 480px) {
+            .fileset_database {
+                font-size: 32px;
+            }
+
+            nav {
+                padding: 10px;
+            }
+
+            .nav-buttons a {
+                margin-bottom: 5px;
+                display: block;
+                text-align: center;
+            }
+        }
+    </style>
+</head>
+
+<body>
+    <nav>
+        <div class="logo">
+            <a href="{{ url_for('home') }}">
+                <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
+            </a>
+        </div>
+        <div class="nav-buttons">
+            <a href="{{ url_for('user_games_list') }}">User Games List</a>
+            <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs') }}">Logs</a>
+        </div>
+        <div class="dev">
+            <form action="{{ url_for('clear_database') }}" method="POST">
+                <button type="submit">Clear Database</button>
+            </form>
+        </div>
+    </nav>
+    <div class="title">
+        <div class="fileset_database">Fileset Database</div>
+    </div>
+</body>
+
+</html>

From f810f51914502cde35e8da062b82c2d423f03d93 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sun, 27 Jul 2025 10:39:44 +0530
Subject: [PATCH 17/47] INTEGRITY: Fix incorrect fileset redirection issue in
 logs.

---
 fileset.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/fileset.py b/fileset.py
index cdcbe0d4..628e539a 100644
--- a/fileset.py
+++ b/fileset.py
@@ -88,6 +88,16 @@ def fileset():
             # Get the id from the GET parameters, or use the minimum id if it's not provided
             id = request.args.get("id", default=min_id, type=int)
 
+            # Check if the id exists in the fileset table
+            cursor.execute("SELECT id FROM fileset WHERE id = %s", (id,))
+            if cursor.rowcount == 0:
+                # If the id doesn't exist, get a new id from the history table
+                cursor.execute(
+                    "SELECT fileset FROM history WHERE oldfileset = %s", (id,)
+                )
+                id = cursor.fetchone()["fileset"]
+                return redirect(f"/fileset?id={id}")
+
             # Get the maximum id from the fileset table
             cursor.execute("SELECT MAX(id) FROM fileset")
             max_id = cursor.fetchone()["MAX(id)"]
@@ -100,15 +110,6 @@ def fileset():
             # Ensure the id is between the minimum and maximum id
             id = max(min_id, min(id, max_id))
 
-            # Check if the id exists in the fileset table
-            cursor.execute("SELECT id FROM fileset WHERE id = %s", (id,))
-            if cursor.rowcount == 0:
-                # If the id doesn't exist, get a new id from the history table
-                cursor.execute(
-                    "SELECT fileset FROM history WHERE oldfileset = %s", (id,)
-                )
-                id = cursor.fetchone()["fileset"]
-
             # Get the history for the current id
             cursor.execute(
                 "SELECT `timestamp`, oldfileset, log FROM history WHERE fileset = %s ORDER BY `timestamp`",

From 9fca525228291d3ca22204fb51f7994c402e7e50 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sun, 27 Jul 2025 13:37:15 +0530
Subject: [PATCH 18/47] INTEGRITY: Add fileset redirection message for merged
 filesets in the new fileset.

---
 fileset.py    | 6 +++++-
 pagination.py | 8 --------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/fileset.py b/fileset.py
index 628e539a..0f0cab0c 100644
--- a/fileset.py
+++ b/fileset.py
@@ -62,6 +62,7 @@ def clear_database():
 @app.route("/fileset", methods=["GET", "POST"])
 def fileset():
     id = request.args.get("id", default=1, type=int)
+    old_id = request.args.get("redirected_from", default=None, type=int)
     widetable = request.args.get("widetable", default="partial", type=str)
     # Load MySQL credentials from a JSON file
     base_dir = os.path.dirname(os.path.abspath(__file__))
@@ -95,8 +96,9 @@ def fileset():
                 cursor.execute(
                     "SELECT fileset FROM history WHERE oldfileset = %s", (id,)
                 )
+                old_id = id
                 id = cursor.fetchone()["fileset"]
-                return redirect(f"/fileset?id={id}")
+                return redirect(f"/fileset?id={id}&redirected_from={old_id}")
 
             # Get the maximum id from the fileset table
             cursor.execute("SELECT MAX(id) FROM fileset")
@@ -141,6 +143,8 @@ def fileset():
             <h2 style="margin-top: 80px;"><u>Fileset: {id}</u></h2>
             <table>
             """
+            if old_id is not None:
+                html += f"""<h3><u>Redirected from Fileset: {old_id}</u></h3>"""
             html += f"<button type='button' onclick=\"location.href='/fileset/{id}/merge'\">Manual Merge</button>"
             # html += f"<button type='button' onclick=\"location.href='/fileset/{id}/possible_merge'\">Possible Merges</button>"
             html += f"""
diff --git a/pagination.py b/pagination.py
index 79d61e93..08905805 100644
--- a/pagination.py
+++ b/pagination.py
@@ -218,14 +218,6 @@ def create_page(
                     matches = re.findall(r"Fileset:(\d+)", value)
                     for fileset_id in matches:
                         fileset_text = f"Fileset:{fileset_id}"
-                        with conn.cursor() as cursor:
-                            cursor.execute(
-                                "SELECT fileset FROM history WHERE oldfileset = %s AND oldfileset != fileset",
-                                (fileset_id,),
-                            )
-                            row = cursor.fetchone()
-                            if row:
-                                fileset_id = row["fileset"]
                         value = value.replace(
                             fileset_text,
                             f"<a href='fileset?id={fileset_id}'>{fileset_text}</a>",

From 9779d9716e5d65ccb347cd68dbb05d16e34f4b98 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Tue, 29 Jul 2025 15:39:37 +0530
Subject: [PATCH 19/47] INTEGRITY: Display only matched files in confirm merge
 by default, introduce checkboxes for showing more details.

---
 fileset.py | 354 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 290 insertions(+), 64 deletions(-)

diff --git a/fileset.py b/fileset.py
index 0f0cab0c..8aa957f1 100644
--- a/fileset.py
+++ b/fileset.py
@@ -22,6 +22,7 @@
     db_connect_root,
     get_checksum_props,
     delete_original_fileset,
+    normalised_path,
 )
 from collections import defaultdict
 from schema import init_database
@@ -642,6 +643,48 @@ def possible_merge_filesets(id):
         connection.close()
 
 
+def get_file_status(candidate_fileset, fileset, conn):
+    """
+    Returns a list of matched file tuples:
+    (candidate_file_name, dat_file_name)
+    """
+    with conn.cursor() as cursor:
+        cursor.execute(
+            "SELECT id, name, size, `size-r`, `size-rd` FROM file WHERE fileset = %s",
+            (candidate_fileset,),
+        )
+        candidate_file_rows = cursor.fetchall()
+
+        candidate_files = {
+            row["id"]: [row["name"], row["size"], row["size-r"], row["size-rd"]]
+            for row in candidate_file_rows
+        }
+
+        dat_sizes = set()
+        dat_names_by_sizes = {}
+
+        for file in fileset["rom"]:
+            (name, size, size_r, size_rd) = file
+            base_name = os.path.basename(normalised_path(name)).lower()
+            key = (size, size_r, size_rd, base_name)
+            dat_sizes.add(key)
+            dat_names_by_sizes[key] = name
+
+        matched_files = []
+
+        for file_id, [file_name, size, size_r, size_rd] in candidate_files.items():
+            base_name = os.path.basename(file_name).lower()
+            key_exact = (size, size_r, size_rd, base_name)
+            key_fallback = (-1, size_r, size_rd, base_name)
+
+            if key_exact in dat_sizes:
+                matched_files.append((file_name, dat_names_by_sizes[key_exact]))
+            elif key_fallback in dat_sizes:
+                matched_files.append((file_name, dat_names_by_sizes[key_fallback]))
+
+        return matched_files
+
+
 @app.route("/fileset/<int:id>/merge/confirm", methods=["GET", "POST"])
 def confirm_merge(id):
     target_id = (
@@ -686,12 +729,12 @@ def confirm_merge(id):
             )
             source_fileset = cursor.fetchone()
 
-            # Select all files
+            # Select all filesw
             file_query = """
                 SELECT f.name, f.size, f.`size-r`, f.`size-rd`, 
                 fc.checksum, fc.checksize, fc.checktype, f.detection
                 FROM file f
-                JOIN filechecksum fc ON fc.file = f.id
+                LEFT JOIN filechecksum fc ON fc.file = f.id
                 WHERE f.fileset = %s
             """
             cursor.execute(file_query, (id,))
@@ -719,6 +762,23 @@ def confirm_merge(id):
             cursor.execute(file_query, (target_id,))
             target_files = cursor.fetchall()
 
+            source_files_set = set()
+            source_fileset_with_files = {}
+
+            for source_file in source_files:
+                file_tuple = (
+                    source_file["name"],
+                    source_file["size"],
+                    source_file["size-r"],
+                    source_file["size-rd"],
+                )
+                source_files_set.add(file_tuple)
+            source_fileset_with_files["rom"] = source_files_set
+
+            matched_files = get_file_status(
+                target_id, source_fileset_with_files, connection
+            )
+
             def highlight_differences(source, target):
                 diff = difflib.ndiff(source, target)
                 source_highlighted = ""
@@ -784,16 +844,30 @@ def highlight_differences(source, target):
 
             if source_files:
                 for file in source_files:
+                    checksum = file["checksum"]
                     checksize = file["checksize"]
-                    if checksize != "1048576" and file["checksize"] == "1M":
+                    checktype = file["checktype"]
+                    size = file["size"]
+                    size_r = file["size-r"]
+                    size_rd = file["size-rd"]
+                    if file["checksum"] is None:
+                        checksum = ""
+                        checksize = ""
+                        checktype = ""
+
+                    if checksize != "1048576" and checksize == "1M":
                         checksize = "1048576"
-                    if checksize != "1048576" and int(file["checksize"]) == 0:
+                    if (
+                        checksize != ""
+                        and checksize != "1048576"
+                        and int(checksize) == 0
+                    ):
                         checksize = "full"
-                    check = file["checktype"] + "-" + checksize
-                    source_files_map[file["name"].lower()][check] = file["checksum"]
-                    source_files_map[file["name"].lower()]["size"] = file["size"]
-                    source_files_map[file["name"].lower()]["size-r"] = file["size-r"]
-                    source_files_map[file["name"].lower()]["size-rd"] = file["size-rd"]
+                    check = checktype + "-" + checksize
+                    source_files_map[file["name"].lower()][check] = checksum
+                    source_files_map[file["name"].lower()]["size"] = size
+                    source_files_map[file["name"].lower()]["size-r"] = size_r
+                    source_files_map[file["name"].lower()]["size-rd"] = size_rd
 
             if target_files:
                 for file in target_files:
@@ -807,49 +881,72 @@ def highlight_differences(source, target):
                     target_files_map[file["name"].lower()]["size"] = file["size"]
                     target_files_map[file["name"].lower()]["size-r"] = file["size-r"]
                     target_files_map[file["name"].lower()]["size-rd"] = file["size-rd"]
-                    print(file)
                     if file["detection"] == 1:
                         detection_files_set.add(file["name"].lower())
 
-            print(detection_files_set)
+            html += """<tr><th>Files</th><td colspan='2'><label><input type="checkbox" id="toggle-unmatched"> Show Unmatched Files</label></td></tr>"""
 
-            all_filenames = sorted(
-                set(source_files_map.keys()) | set(target_files_map.keys())
-            )
-            html += "<tr><th>Files</th></tr>"
-            for filename in all_filenames:
-                source_dict = source_files_map.get(filename, {})
-                target_dict = target_files_map.get(filename, {})
+            all_source_unmatched_filenames = sorted(set(source_files_map.keys()))
+            all_target_unmatched_filenames = sorted(set(target_files_map.keys()))
+
+            for matched_target_filename, matched_source_filename in matched_files:
+                if matched_source_filename.lower() in all_source_unmatched_filenames:
+                    all_source_unmatched_filenames.remove(
+                        matched_source_filename.lower()
+                    )
+                if matched_target_filename.lower() in all_target_unmatched_filenames:
+                    all_target_unmatched_filenames.remove(
+                        matched_target_filename.lower()
+                    )
+                source_dict = source_files_map.get(matched_source_filename.lower(), {})
+                target_dict = target_files_map.get(matched_target_filename.lower(), {})
 
-                html += f"<tr><th>{filename}</th><th>Source File</th><th>Target File</th></tr>"
+                # html += f"""<tr><th>{matched_source_filename}</th><th>Source File</th><th>Target File</th></tr>"""
 
                 keys = sorted(set(source_dict.keys()) | set(target_dict.keys()))
 
+                group_id = f"group_{matched_source_filename.lower().replace('.', '_').replace('/', '_')}_{matched_target_filename.lower().replace('.', '_').replace('/', '_')}"
+                html += f"""<tr>
+                    <td colspan='3'>
+                        <label>
+                            <input type="checkbox" onclick="toggleGroup('{group_id}')">
+                            Show all fields for <strong>{matched_source_filename}</strong>
+                        </label>
+                    </td>
+                </tr>"""
+
                 for key in keys:
                     source_value = str(source_dict.get(key, ""))
                     target_value = str(target_dict.get(key, ""))
 
                     source_checked = "checked" if key in source_dict else ""
-                    source_checksum = source_files_map[filename.lower()].get(key, "")
-                    target_checksum = target_files_map[filename.lower()].get(key, "")
+                    source_checksum = source_files_map[
+                        matched_source_filename.lower()
+                    ].get(key, "")
+                    target_checksum = target_files_map[
+                        matched_target_filename.lower()
+                    ].get(key, "")
 
                     source_val = html_lib.escape(
                         json.dumps(
                             {
                                 "side": "source",
-                                "filename": filename,
+                                "filename": matched_source_filename,
                                 "prop": key,
                                 "value": source_checksum,
                                 "detection": "0",
                             }
                         )
                     )
-                    if filename in detection_files_set:
+                    if (
+                        os.path.basename(matched_source_filename).lower()
+                        in detection_files_set
+                    ):
                         target_val = html_lib.escape(
                             json.dumps(
                                 {
                                     "side": "target",
-                                    "filename": filename,
+                                    "filename": matched_source_filename,
                                     "prop": key,
                                     "value": target_checksum,
                                     "detection": "1",
@@ -861,46 +958,151 @@ def highlight_differences(source, target):
                             json.dumps(
                                 {
                                     "side": "target",
-                                    "filename": filename,
+                                    "filename": matched_target_filename,
                                     "prop": key,
                                     "value": target_checksum,
                                     "detection": "0",
                                 }
                             )
                         )
-
                     if source_value != target_value:
                         source_highlighted, target_highlighted = highlight_differences(
                             source_value, target_value
                         )
-
-                        html += f"""
-                        <tr>
-                            <td>{key}</td>
-                            <td>
-                                <input type="checkbox" name="options[]" value="{source_val}" {source_checked}>
-                                {source_highlighted}
-                            </td>
-                            <td>
-                                <input type="checkbox" name="options[]" value="{target_val}">
-                                {target_highlighted}
-                            </td>
-                        </tr>
-                        """
+                        if key == "md5-full":
+                            html += f"""<tr>
+                                <td>{key}</td>
+                                <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
+                                <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
+                            </tr>"""
+                        else:
+                            html += f"""<tbody class="toggle-details" id="{group_id}" style="display: none;">
+                                <tr>
+                                    <td>{key}</td>
+                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
+                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
+                                </tr>
+                            </tbody>"""
                     else:
-                        html += f"""
-                        <tr>
-                            <td>{key}</td>
-                            <td>
-                                <input type="checkbox" name="options[]" value="{source_val}" {source_checked}>
-                                {source_value}
-                            </td>
-                            <td>
-                                <input type="checkbox" name="options[]" value="{target_val}">
-                                {target_value}
-                            </td>
-                        </tr>
-                        """
+                        if key == "md5-full":
+                            html += f"""<tr>
+                                <td>{key}</td>
+                                <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
+                                <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
+                            </tr>"""
+                        else:
+                            html += f"""<tbody class="toggle-details" id="{group_id}" style="display: none;">
+                                <tr>
+                                    <td>{key}</td>
+                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
+                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
+                                </tr>
+                            </tbody>"""
+
+            all_unmatched_filenames = [
+                all_target_unmatched_filenames,
+                all_source_unmatched_filenames,
+            ]
+
+            for unmatched_filenames in all_unmatched_filenames:
+                for filename in unmatched_filenames:
+                    source_dict = source_files_map.get(filename.lower(), {})
+                    target_dict = target_files_map.get(filename.lower(), {})
+
+                    keys = sorted(set(source_dict.keys()) | set(target_dict.keys()))
+                    group_id = (
+                        f"group_{filename.lower().replace('.', '_').replace('/', '_')}"
+                    )
+                    html += f"""<tr class="unmatched" style='display: none;'>
+                        <td colspan='3'>
+                            <label>
+                                <input type="checkbox" onclick="toggleGroup('{group_id}')">
+                                Show all fields for <strong>{filename}</strong>
+                            </label>
+                        </td>
+                    </tr>"""
+
+                    for key in keys:
+                        source_value = str(source_dict.get(key, ""))
+                        target_value = str(target_dict.get(key, ""))
+
+                        source_checked = "checked" if key in source_dict else ""
+                        source_checksum = source_files_map[filename.lower()].get(
+                            key, ""
+                        )
+                        target_checksum = target_files_map[filename.lower()].get(
+                            key, ""
+                        )
+
+                        source_val = html_lib.escape(
+                            json.dumps(
+                                {
+                                    "side": "source",
+                                    "filename": filename,
+                                    "prop": key,
+                                    "value": source_checksum,
+                                    "detection": "0",
+                                }
+                            )
+                        )
+                        if filename.lower() in detection_files_set:
+                            target_val = html_lib.escape(
+                                json.dumps(
+                                    {
+                                        "side": "target",
+                                        "filename": filename,
+                                        "prop": key,
+                                        "value": target_checksum,
+                                        "detection": "1",
+                                    }
+                                )
+                            )
+                        else:
+                            target_val = html_lib.escape(
+                                json.dumps(
+                                    {
+                                        "side": "target",
+                                        "filename": filename,
+                                        "prop": key,
+                                        "value": target_checksum,
+                                        "detection": "0",
+                                    }
+                                )
+                            )
+
+                        if source_value != target_value:
+                            source_highlighted, target_highlighted = (
+                                highlight_differences(source_value, target_value)
+                            )
+                            if key == "md5-full":
+                                html += f"""<tr class="unmatched" style='display: none;'">
+                                    <td>{key}</td>
+                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
+                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
+                                </tr>"""
+                            else:
+                                html += f"""<tbody class="toggle-details" id="{group_id}"  style='display: none;'>
+                                    <tr>
+                                        <td>{key}</td>
+                                        <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
+                                        <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
+                                    </tr>
+                                </tbody>"""
+                        else:
+                            if key == "md5-full":
+                                html += f"""<tr class="unmatched" style='display: none;'>
+                                    <td>{key}</td>
+                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
+                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
+                                </tr>"""
+                            else:
+                                html += f"""<tbody class="toggle-details unmatched" id="{group_id}"  style='display: none;'>
+                                    <tr>
+                                        <td>{key}</td>
+                                        <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
+                                        <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
+                                    </tr>
+                                </tbody>"""
 
             html += """
             </table>
@@ -912,6 +1114,22 @@ def highlight_differences(source, target):
                 <input type="submit" value="Cancel">
             </form>
             <script src="{{ url_for('static', filename='js/confirm_merge_form_handler.js') }}"></script>
+            <script>
+            document.getElementById("toggle-unmatched").addEventListener("change", function() {
+                const rows = document.querySelectorAll("tr.unmatched");
+                rows.forEach(row => {
+                    row.style.display = this.checked ? "" : "none";
+                });
+            });
+            </script>
+            <script>
+            function toggleGroup(groupId) {
+                const rows = document.querySelectorAll(`#${groupId}`);
+                rows.forEach(row => {
+                    row.style.display = (row.style.display === "none") ? "" : "none";
+                });
+            }
+            </script>
             </body>
             </html>
             """
@@ -1049,10 +1267,11 @@ def execute_merge(id):
                                 source_file_id,
                             ),
                         )
+                        filename = os.path.basename(filename).lower()
                         cursor.execute(
                             """SELECT f.id as file_id FROM file f
                                     JOIN fileset fs ON fs.id = f.fileset 
-                                    WHERE f.name = %s
+                                    WHERE REGEXP_REPLACE(f.name, '^.*[\\\\/]', '') = %s
                                     AND fs.id = %s""",
                             (filename, target_id),
                         )
@@ -1060,16 +1279,23 @@ def execute_merge(id):
                         cursor.execute(
                             "DELETE FROM file WHERE id = %s", (target_file_id,)
                         )
-                    for c in details["checksums"]:
-                        checksum = c["value"]
-                        check = c["check"]
-                        checksize, checktype, checksum = get_checksum_props(
-                            check, checksum
-                        )
-                        query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
-                        cursor.execute(
-                            query, (source_file_id, checksize, checktype, checksum)
-                        )
+
+                    check = ""
+                    checksize = ""
+                    checktype = ""
+                    checksum = ""
+
+                    if "checksums" in details:
+                        for c in details["checksums"]:
+                            checksum = c["value"]
+                            check = c["check"]
+                            checksize, checktype, checksum = get_checksum_props(
+                                check, checksum
+                            )
+                            query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
+                            cursor.execute(
+                                query, (source_file_id, checksize, checktype, checksum)
+                            )
 
                     cursor.execute(
                         "UPDATE file SET fileset = %s WHERE id = %s",

From 8602689121f45adf92e85614e6a7ae77539e45fb Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Tue, 29 Jul 2025 16:20:27 +0530
Subject: [PATCH 20/47] INTEGRITY: Add check for matching files by missing size
 and hide merge button after clicked.

---
 fileset.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/fileset.py b/fileset.py
index 8aa957f1..cad10414 100644
--- a/fileset.py
+++ b/fileset.py
@@ -667,8 +667,11 @@ def get_file_status(candidate_fileset, fileset, conn):
             (name, size, size_r, size_rd) = file
             base_name = os.path.basename(normalised_path(name)).lower()
             key = (size, size_r, size_rd, base_name)
+            key2 = (-1, size_r, size_rd, base_name)
             dat_sizes.add(key)
+            dat_sizes.add(key2)
             dat_names_by_sizes[key] = name
+            dat_names_by_sizes[key2] = name
 
         matched_files = []
 
@@ -1108,13 +1111,23 @@ def highlight_differences(source, target):
             </table>
                 <input type="hidden" name="source_id" value="{{ source_fileset['id'] }}">
                 <input type="hidden" name="target_id" value="{{ target_fileset['id'] }}">
-                <button type="submit">Confirm Merge</button>
+                <button id="confirm_merge_submit" type="submit">Confirm Merge</button>
             </form>
+            <div id="merging-status" style="display: none; font-weight: bold; margin-top: 10px;">
+                Merging... Please wait.
+            </div>
             <form action="{{ url_for('fileset', id=id) }}">
-                <input type="submit" value="Cancel">
+                <input id="confirm_merge_cancel" type="submit" value="Cancel">
             </form>
             <script src="{{ url_for('static', filename='js/confirm_merge_form_handler.js') }}"></script>
             <script>
+            document.getElementById("confirm_merge_form").addEventListener("submit", function () {
+                document.getElementById("merging-status").style.display = "block";
+                document.getElementById("confirm_merge_submit").style.display = "none";
+                document.getElementById("confirm_merge_cancel").style.display = "none";
+            });
+            </script>
+            <script>
             document.getElementById("toggle-unmatched").addEventListener("change", function() {
                 const rows = document.querySelectorAll("tr.unmatched");
                 rows.forEach(row => {

From bf65cb0df0269b3f74dce68e00f069dee5a6f849 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Tue, 29 Jul 2025 23:52:35 +0530
Subject: [PATCH 21/47] INTEGRITY: Add configuration page with a feature to
 select items per page.

---
 fileset.py            |  64 +++++++++++++++++-
 pagination.py         |   1 +
 templates/config.html | 149 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 211 insertions(+), 3 deletions(-)
 create mode 100644 templates/config.html

diff --git a/fileset.py b/fileset.py
index cad10414..48605060 100644
--- a/fileset.py
+++ b/fileset.py
@@ -6,7 +6,9 @@
     render_template_string,
     jsonify,
     render_template,
+    make_response,
 )
+
 import pymysql.cursors
 import json
 import html as html_lib
@@ -139,6 +141,7 @@ def fileset():
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
                     <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
                     <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                    <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
             </nav>
             <h2 style="margin-top: 80px;"><u>Fileset: {id}</u></h2>
@@ -502,6 +505,7 @@ def merge_fileset(id):
                         <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
                         <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
                         <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                        <a href="{{{{ url_for('config') }}}}">Config</a>
                     </div>
                 </nav>
                 <h2 style="margin-top: 80px;">Search Results for '{search_query}'</h2>
@@ -549,6 +553,7 @@ def merge_fileset(id):
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
             <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
     <h2 style="margin-top: 80px;">Search Fileset to Merge</h2>
@@ -616,6 +621,7 @@ def possible_merge_filesets(id):
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
                     <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
                     <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                    <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
             </nav>
             <h2 style="margin-top: 80px;">Possible Merges for fileset-'{id}'</h2>
@@ -818,6 +824,7 @@ def highlight_differences(source, target):
                     <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
                     <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
                     <a href="{{ url_for('logs') }}">Logs</a>
+                    <a href="{{ url_for('config') }}">Config</a>
                 </div>
             </nav>
             <h2 style="margin-top: 80px;">Confirm Merge</h2>
@@ -1368,6 +1375,34 @@ def mark_as_full(id):
     return redirect(f"/fileset?id={id}")
 
 
+@app.route("/config", methods=["GET", "POST"])
+def config():
+    """
+    Stores the user configurations in the cookies
+    """
+    if request.method == "POST":
+        items_per_page = request.form.get("items_per_page", "25")
+
+        try:
+            items_per_page_int = int(items_per_page)
+            if items_per_page_int < 1:
+                items_per_page = "1"
+        except ValueError:
+            items_per_page = "25"
+
+        resp = make_response(redirect(url_for("config")))
+        resp.set_cookie("items_per_page", items_per_page, max_age=365 * 24 * 60 * 60)
+        return resp
+
+    items_per_page = int(request.cookies.get("items_per_page", "25"))
+
+    return render_template("config.html", items_per_page=items_per_page)
+
+
+def get_items_per_page():
+    return int(request.cookies.get("items_per_page", "25"))
+
+
 @app.route("/validate", methods=["POST"])
 def validate():
     error_codes = {
@@ -1506,8 +1541,19 @@ def games_list():
         "engine.id": "game.engine",
         "game.id": "fileset.game",
     }
+
+    items_per_page = get_items_per_page()
+
     return render_template_string(
-        create_page(filename, 25, records_table, select_query, order, filters, mapping)
+        create_page(
+            filename,
+            items_per_page,
+            records_table,
+            select_query,
+            order,
+            filters,
+            mapping,
+        )
     )
 
 
@@ -1524,8 +1570,11 @@ def logs():
         "user": "log",
         "text": "log",
     }
+    items_per_page = get_items_per_page()
     return render_template_string(
-        create_page(filename, 25, records_table, select_query, order, filters)
+        create_page(
+            filename, items_per_page, records_table, select_query, order, filters
+        )
     )
 
 
@@ -1558,8 +1607,17 @@ def fileset_search():
         "engine.id": "game.engine",
         "fileset.id": "transactions.fileset",
     }
+    items_per_page = get_items_per_page()
     return render_template_string(
-        create_page(filename, 25, records_table, select_query, order, filters, mapping)
+        create_page(
+            filename,
+            items_per_page,
+            records_table,
+            select_query,
+            order,
+            filters,
+            mapping,
+        )
     )
 
 
diff --git a/pagination.py b/pagination.py
index 08905805..22f7930c 100644
--- a/pagination.py
+++ b/pagination.py
@@ -155,6 +155,7 @@ def create_page(
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
             <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
 <form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
diff --git a/templates/config.html b/templates/config.html
new file mode 100644
index 00000000..a578779d
--- /dev/null
+++ b/templates/config.html
@@ -0,0 +1,149 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+    <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+    <style>
+        body {
+            margin: 0;
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+        }
+
+        .content {
+            height: 100vh;
+            display: flex;
+            flex-direction: column;
+            justify-content: center;
+            align-items: center;
+        }
+
+        .title {
+            margin-top: 8vh;
+            text-align: center;
+            background-color: #ffffff;
+            color: #000000;
+            padding: 10px;
+            font-size: 50px;
+            align-self: flex-start;
+            margin-left: 2vh;
+        }
+
+        .main {
+            height: 90vh;
+            width: 100vw;
+        }
+
+        .config-section {
+            margin-bottom: 30px;
+            padding: 20px;
+            border: 1px solid #ddd;
+            border-radius: 8px;
+            background-color: #f9f9f9;
+            max-width: 600px;
+            margin-left: auto;
+            margin-right: auto;
+        }
+
+        .config-item {
+            display: flex;
+            align-items: center;
+            margin-bottom: 15px;
+        }
+
+        .config-item label {
+            flex: 1;
+            margin-right: 15px;
+            font-weight: 500;
+        }
+
+        .current-value {
+            font-style: italic;
+            color: #666;
+            font-size: 12px;
+        }
+
+        .config-item input,
+        .config-item select {
+            padding: 8px 12px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            font-size: 14px;
+            min-width: 120px;
+            margin-right: 2vw;
+        }
+
+        .success-message {
+            background-color: #d4edda;
+            color: #155724;
+            padding: 10px;
+            border: 1px solid #c3e6cb;
+            border-radius: 4px;
+            margin-bottom: 20px;
+            text-align: center;
+        }
+
+        @media (max-width: 768px) {
+            .config {
+                font-size: 40px;
+            }
+        }
+
+        @media (max-width: 480px) {
+            .config {
+                font-size: 32px;
+            }
+
+            nav {
+                padding: 10px;
+            }
+
+            .nav-buttons a {
+                margin-bottom: 5px;
+                display: block;
+                text-align: center;
+            }
+        }
+    </style>
+</head>
+
+<body>
+    <nav>
+        <div class="logo">
+            <a href="{{ url_for('home') }}">
+                <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
+            </a>
+        </div>
+        <div class="nav-buttons">
+            <a href="{{ url_for('user_games_list') }}">User Games List</a>
+            <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('config') }}">Config</a>
+        </div>
+    </nav>
+    <div class="content">
+        <h1 class="title">User Configurations</h1>
+        <div class="main">
+            <form method="POST" action="{{ url_for('config') }}">
+                <div class="config-section">
+                    <div class="config-item">
+                        <label for="items_per_page">Number of items per page:</label>
+                        <input type="number" name="items_per_page" id="items_per_page" value="{{ items_per_page }}"
+                            min="1">
+                        <div class="current-value">Current: {{ items_per_page }}</div>
+                    </div>
+                    <div style="text-align: center; padding: 20px;">
+                        <button type="submit">Save Configuration</button>
+                    </div>
+            </form>
+        </div>
+    </div>
+    <script>
+        document.getElementById('items_per_page').addEventListener('input', function () {
+            const value = parseInt(this.value);
+            if (value < 1) this.value = 1;
+        });
+    </script>
+</body>
+
+</html>

From 1d254e199520360b22f6ee602f9dded2b30afa47 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 30 Jul 2025 00:28:17 +0530
Subject: [PATCH 22/47] INTEGRITY: Add user details in manual merge log.

---
 fileset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fileset.py b/fileset.py
index 48605060..bc7fa46f 100644
--- a/fileset.py
+++ b/fileset.py
@@ -13,6 +13,7 @@
 import json
 import html as html_lib
 import os
+import getpass
 from pagination import create_page
 import difflib
 from db_functions import (
@@ -1340,7 +1341,8 @@ def execute_merge(id):
 
             delete_original_fileset(source_id, connection)
             category_text = "Manually Merged"
-            log_text = f"Manually merged Fileset:{source_id} with Fileset:{target_id}."
+            user = f"cli:{getpass.getuser()}"
+            log_text = f"Manually merged Fileset:{source_id} with Fileset:{target_id} by user: {user}."
             create_log(category_text, "Moderator", log_text, connection)
 
             query = """

From b7a6e245c46ad26929a6be18326c7170414de236 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 30 Jul 2025 00:47:41 +0530
Subject: [PATCH 23/47] INTEGRITY: Add config page url in the homepage.

---
 templates/home.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/templates/home.html b/templates/home.html
index 458f5fbb..c41cda05 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -90,6 +90,7 @@
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
             <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('config') }}">Config</a>
         </div>
         <div class="dev">
             <form action="{{ url_for('clear_database') }}" method="POST">

From 9b62c1547a129686fd71874b99af926ae60d4b90 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 31 Jul 2025 00:44:34 +0530
Subject: [PATCH 24/47] INTEGRITY: Add user.dat processing logic.

---
 db_functions.py | 500 +++++++++++++++++++++++++++++++++---------------
 fileset.py      | 107 +++++------
 schema.py       |   5 +-
 3 files changed, 387 insertions(+), 225 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index 9b385ebb..e478dbb0 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -175,9 +175,19 @@ def insert_fileset(
         cursor.execute("SELECT @fileset_last")
         fileset_last = cursor.fetchone()["@fileset_last"]
 
-    log_text = f"Created Fileset:{fileset_last}, {log_text}"
-    if src == "user":
-        log_text = f"Created Fileset:{fileset_last}, from user: IP {ip}."
+        log_text = f"Created Fileset:{fileset_last}, {log_text}"
+        if src == "user":
+            query = """
+                INSERT INTO queue (time, fileset, ip)
+                VALUES (FROM_UNIXTIME(@fileset_time_last), %s, %s)
+            """
+            cursor.execute(query, (fileset_id, ip))
+            cursor.execute(
+                "UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = %s",
+                (fileset_id,),
+            )
+            cursor.execute(query, (fileset_id, ip))
+            log_text = f"Created Fileset:{fileset_last}, from user: IP {ip}."
 
     user = f"cli:{getpass.getuser()}" if username is None else username
     if not skiplog and detection:
@@ -698,6 +708,9 @@ def scan_process(
 
     id_to_fileset_mapping = defaultdict(dict)
 
+    # set of filesets whose files got updated
+    filesets_check_for_full = set()
+
     fileset_count = 0
     for fileset in game_data:
         console_log_file_update(fileset_count)
@@ -722,18 +735,19 @@ def scan_process(
 
         id_to_fileset_mapping[fileset_id] = fileset
 
-        # set of filesets whose files got updated
-        filesets_check_for_full = set()
+        possible_full_filesets = set()
 
         for rom in fileset["rom"]:
-            pre_update_files(rom, filesets_check_for_full, transaction_id, conn)
+            pre_update_files(rom, transaction_id, conn, possible_full_filesets)
+
+        filesets_check_for_full.update(possible_full_filesets)
         fileset_count += 1
 
     fileset_count = 0
     for fileset_id, fileset in id_to_fileset_mapping.items():
         console_log_matching(fileset_count)
-        candidate_filesets = scan_filter_candidate_filesets(
-            fileset_id, fileset, transaction_id, conn
+        candidate_filesets = filter_candidate_filesets(
+            fileset["rom"], transaction_id, conn
         )
 
         if len(candidate_filesets) == 0:
@@ -773,6 +787,9 @@ def scan_process(
         )
         fileset_count += 1
 
+    # If any partial fileset turned full with pre file updates, turn it full
+    update_status_for_partial_filesets(list(filesets_check_for_full), conn)
+
     # Final log
     with conn.cursor() as cursor:
         cursor.execute(
@@ -789,10 +806,12 @@ def scan_process(
         create_log(category_text, user, log_text, conn)
 
 
-def pre_update_files(rom, filesets_check_for_full, transaction_id, conn):
+def pre_update_files(rom, transaction_id, conn, filesets_check_for_full=None):
     """
     Updates all the checksums for the files matching by a checksum and size.
     """
+    if filesets_check_for_full is None:
+        filesets_check_for_full = set()
     with conn.cursor() as cursor:
         checksums = defaultdict(str)
         for key in rom:
@@ -971,8 +990,8 @@ def scan_perform_match(
 
             # Drop the fileset, note down the file differences
             elif status == "full":
-                (unmatched_candidate_files, unmatched_scan_files) = get_unmatched_files(
-                    matched_fileset_id, fileset, conn
+                (_, unmatched_candidate_files, unmatched_scan_files) = (
+                    get_unmatched_files(matched_fileset_id, fileset, conn)
                 )
                 fully_matched = (
                     True
@@ -984,8 +1003,7 @@ def scan_perform_match(
                     match_with_full_fileset += 1
                 else:
                     mismatch_with_full_fileset += 1
-                log_scan_match_with_full(
-                    fileset_id,
+                log_match_with_full(
                     matched_fileset_id,
                     unmatched_candidate_files,
                     unmatched_scan_files,
@@ -1149,9 +1167,10 @@ def total_fileset_files(fileset):
     return len(fileset["rom"])
 
 
-def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
+def filter_candidate_filesets(roms, transaction_id, conn):
     """
     Returns a list of candidate filesets that can be merged.
+    For scan.dat and user.dat
     Performs early filtering in SQL (by name, size) and then
     applies checksum filtering and max-match filtering in Python.
     """
@@ -1179,9 +1198,9 @@ def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
             {
                 "file_id": row["file_id"],
                 "name": os.path.basename(normalised_path(row["name"])).lower(),
-                "size": row["size"],
-                "size-r": row["size_r"],
-                "size-rd": row["size_rd"],
+                "size": row["size"] if "size" in row else 0,
+                "size-r": row["size_r"] if "size-r" in row else 0,
+                "size-rd": row["size_rd"] if "size-rd" in row else 0,
             }
         )
     for id, files in candidate_map.items():
@@ -1189,7 +1208,7 @@ def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
 
     set_checksums = set()
     set_file_name_size = set()
-    for file in fileset["rom"]:
+    for file in roms:
         name = os.path.basename(normalised_path(file["name"]))
         for key in file:
             if key.startswith("md5"):
@@ -1284,7 +1303,7 @@ def scan_filter_candidate_filesets(fileset_id, fileset, transaction_id, conn):
 
     matched_candidates = []
     for candidate in candidates:
-        if is_full_detection_checksum_match(candidate, fileset, conn):
+        if is_full_detection_checksum_match(candidate, roms, conn):
             matched_candidates.append(candidate)
 
     if len(matched_candidates) != 0:
@@ -1343,12 +1362,17 @@ def get_unmatched_files(candidate_fileset, fileset, conn):
             for key in dat_checksums
             if key not in matched_dat_pairs
         }
+        matched_dat_files = {
+            dat_names_by_checksum[key]
+            for key in dat_checksums
+            if key in matched_dat_pairs
+        }
         unmatched_dat_files = list(unmatched_dat_files)
 
-        return (unmatched_candidate_files, unmatched_dat_files)
+        return (matched_dat_files, unmatched_candidate_files, unmatched_dat_files)
 
 
-def is_full_detection_checksum_match(candidate_fileset, fileset, conn):
+def is_full_detection_checksum_match(candidate_fileset, files, conn):
     """
     Return type - Boolean
     Checks if all the detection files in the candidate fileset have corresponding checksums matching with scan.
@@ -1367,7 +1391,7 @@ def is_full_detection_checksum_match(candidate_fileset, fileset, conn):
 
         # set of (checksum, filename)
         scan_checksums = set()
-        for file in fileset["rom"]:
+        for file in files:
             for key in file:
                 if key.startswith("md5"):
                     name = os.path.basename(normalised_path(file["name"]))
@@ -1467,7 +1491,7 @@ def set_process(
         set_dat_metadata = ""
         for meta in fileset:
             if meta != "rom":
-                set_dat_metadata += meta + " = " + fileset[meta] + "  ,  "
+                set_dat_metadata += meta + ": " + fileset[meta] + "  "
 
         (fileset_id, existing) = insert_new_fileset(
             fileset,
@@ -1750,8 +1774,8 @@ def set_perform_match(
                     matched_fileset_id, manual_merge_map, set_to_candidate_dict, conn
                 )
             elif status == "partial" or status == "full":
-                (unmatched_candidate_files, unmatched_dat_files) = get_unmatched_files(
-                    matched_fileset_id, fileset, conn
+                (_, unmatched_candidate_files, unmatched_dat_files) = (
+                    get_unmatched_files(matched_fileset_id, fileset, conn)
                 )
                 is_match = (
                     True
@@ -1890,8 +1914,8 @@ def add_manual_merge(
                     (%s, %s)
                 """
             cursor.execute(query, (child_fileset, parent_fileset))
-
-    create_log(category_text, user, log_text, conn)
+    if category_text and log_text:
+        create_log(category_text, user, log_text, conn)
     if print_text:
         print(print_text)
 
@@ -2057,7 +2081,7 @@ def set_filter_candidate_filesets(
 
     matched_candidates = []
     for candidate in candidates:
-        if is_full_detection_checksum_match(candidate, fileset, conn):
+        if is_full_detection_checksum_match(candidate, fileset["rom"], conn):
             matched_candidates.append(candidate)
 
     if len(matched_candidates) != 0:
@@ -2341,8 +2365,7 @@ def log_matched_fileset(src, fileset_last, fileset_id, state, user, conn):
     update_history(fileset_last, fileset_id, conn, log_last)
 
 
-def log_scan_match_with_full(
-    fileset_last,
+def log_match_with_full(
     candidate_id,
     unmatched_candidate_files,
     unmatched_scan_files,
@@ -2362,6 +2385,22 @@ def log_scan_match_with_full(
     create_log(category_text, user, log_text, conn)
 
 
+def log_user_match_with_full(
+    candidate_id,
+    unmatched_full_files,
+    unmatched_user_files,
+    matched_user_files,
+    fully_matched,
+    user,
+    conn,
+):
+    category_text = "User fileset mismatch"
+    if fully_matched:
+        category_text = "User fileset matched"
+    log_text = f"""Candidate Full Fileset:{candidate_id}. Total matched user files = {len(matched_user_files)}. Missing/mismatch Files = {len(unmatched_full_files)}. Unknown Files = {len(unmatched_user_files)}. List of Missing/mismatch files : {", ".join(scan_file for scan_file in unmatched_full_files)}, List of unknown files : {", ".join(scan_file for scan_file in unmatched_user_files)}"""
+    create_log(category_text, user, log_text, conn)
+
+
 def finalize_fileset_insertion(
     conn, transaction_id, src, filepath, author, version, source_status, user
 ):
@@ -2377,6 +2416,90 @@ def finalize_fileset_insertion(
             create_log(category_text, user, log_text, conn)
 
 
+def user_perform_match(
+    fileset,
+    src,
+    user,
+    candidate_filesets,
+    game_metadata,
+    transaction_id,
+    conn,
+    ip,
+):
+    with conn.cursor() as cursor:
+        single_candidate_id = candidate_filesets[0]
+        cursor.execute(
+            "SELECT status FROM fileset WHERE id = %s", (single_candidate_id,)
+        )
+        status = cursor.fetchone()["status"]
+        if len(candidate_filesets) == 1 and status == "full":
+            if status == "full":
+                # Checks how many files match
+                (matched_dat_files, unmatched_full_files, unmatched_user_files) = (
+                    get_unmatched_files(single_candidate_id, fileset, conn)
+                )
+                return (
+                    "full",
+                    -1,
+                    single_candidate_id,
+                    matched_dat_files,
+                    unmatched_full_files,
+                    unmatched_user_files,
+                )
+        # Includes cases for
+        # - single candidate with detection or partial status
+        # - multiple candidates
+        else:
+            # Create a new fileset and add links to candidates
+            fileset_id = create_user_fileset(
+                fileset, game_metadata, src, transaction_id, user, conn, ip
+            )
+            if fileset_id != -1:
+                add_manual_merge(
+                    candidate_filesets,
+                    fileset_id,
+                    None,
+                    None,
+                    user,
+                    conn,
+                )
+            return ("multiple", fileset_id, -1, [], [], [])
+
+
+def create_user_fileset(fileset, game_metadata, src, transaction_id, user, conn, ip):
+    with conn.cursor() as cursor:
+        key = calc_key(fileset)
+        try:
+            engine_name = ""
+            engineid = game_metadata["engineid"]
+            title = ""
+            gameid = game_metadata["gameid"]
+            extra = game_metadata.get("extra", "")
+            platform = game_metadata.get("platform", "")
+            lang = game_metadata.get("language", "")
+        except KeyError as e:
+            print(f"Missing key in metadata: {e}")
+            return
+
+        (fileset_id, _) = insert_fileset(
+            src, False, key, None, transaction_id, None, conn, ip=ip
+        )
+
+        insert_game(engine_name, engineid, title, gameid, extra, platform, lang, conn)
+        if fileset_id:
+            for file in fileset["rom"]:
+                insert_file(file, False, src, conn)
+                file_id = None
+                with conn.cursor() as cursor:
+                    cursor.execute("SELECT @file_last AS file_id")
+                    file_id = cursor.fetchone()["file_id"]
+                for key, value in file.items():
+                    if key not in ["name", "size", "size-r", "size-rd"]:
+                        insert_filechecksum(file, key, file_id, conn)
+
+        return fileset_id
+
+
 def user_integrity_check(data, ip, game_metadata=None):
     src = "user"
     source_status = src
@@ -2386,8 +2509,8 @@ def user_integrity_check(data, ip, game_metadata=None):
         new_file = {
             "name": file["name"],
             "size": file["size"],
-            "size-r": file["size-r"],
-            "size-rd": file["size-rd"],
+            "size-r": file["size-r"] if "size-r" in file else 0,
+            "size-rd": file["size-rd"] if "size-rd" in file else 0,
         }
         for checksum in file["checksums"]:
             checksum_type = checksum["type"]
@@ -2409,7 +2532,10 @@ def user_integrity_check(data, ip, game_metadata=None):
     try:
         with conn.cursor() as cursor:
             cursor.execute("SELECT MAX(`transaction`) FROM transactions")
-            transaction_id = cursor.fetchone()["MAX(`transaction`)"] + 1
+            transaction_id = cursor.fetchone()["MAX(`transaction`)"]
+            if transaction_id is None:
+                transaction_id = 0
+            transaction_id += 1
 
             category_text = f"Uploaded from {src}"
             log_text = f"Started loading file, State {source_status}. Transaction: {transaction_id}"
@@ -2418,128 +2544,117 @@ def user_integrity_check(data, ip, game_metadata=None):
 
             create_log(category_text, user, log_text, conn)
 
-            matched_map = find_matching_filesets(data, conn, src)
-
-            # show matched, missing, extra
-            extra_map = defaultdict(list)
-            missing_map = defaultdict(list)
-            extra_set = set()
-            missing_set = set()
+            # Check if the key already exists in the db
+            query = """
+                SELECT id
+                FROM fileset
+                WHERE `key` = %s
+                AND (status = 'user' OR status = 'ReadyForReview')
+            """
+            cursor.execute(query, (key,))
+            existing_entry = cursor.fetchone()
+            if existing_entry is not None:
+                match_type = "no_candidate"
+                existing_fileset_id = existing_entry["id"]
+                add_usercount(existing_fileset_id, ip, conn)
+                conn.commit()
+                return (match_type, existing_fileset_id, [], [], [])
+
+            candidate_filesets = filter_candidate_filesets(
+                data["rom"], transaction_id, conn
+            )
 
-            for fileset_id in matched_map.keys():
-                cursor.execute("SELECT * FROM file WHERE fileset = %s", (fileset_id,))
-                target_files = cursor.fetchall()
-                target_files_dict = {}
-                for target_file in target_files:
-                    cursor.execute(
-                        "SELECT * FROM filechecksum WHERE file = %s",
-                        (target_file["id"],),
-                    )
-                    target_checksums = cursor.fetchall()
-                    for checksum in target_checksums:
-                        target_files_dict[checksum["checksum"]] = target_file
-                        # target_files_dict[target_file['id']] = f"{checksum['checktype']}-{checksum['checksize']}"
-
-                # Collect all the checksums from data['files']
-                data_files_set = set()
-                for file in data["files"]:
-                    for checksum_info in file["checksums"]:
-                        checksum = checksum_info["checksum"]
-                        checktype = checksum_info["type"]
-                        checksize, checktype, checksum = get_checksum_props(
-                            checktype, checksum
-                        )
-                        data_files_set.add(checksum)
-
-                # Identify missing files
-                matched_names = set()
-                for checksum, target_file in target_files_dict.items():
-                    if checksum not in data_files_set:
-                        if target_file["name"] not in matched_names:
-                            missing_set.add(target_file["name"])
-                        else:
-                            missing_set.discard(target_file["name"])
-                    else:
-                        matched_names.add(target_file["name"])
-
-                for tar in missing_set:
-                    missing_map[fileset_id].append({"name": tar})
-
-                # Identify extra files
-                for file in data["files"]:
-                    file_exists = False
-                    for checksum_info in file["checksums"]:
-                        checksum = checksum_info["checksum"]
-                        checktype = checksum_info["type"]
-                        checksize, checktype, checksum = get_checksum_props(
-                            checktype, checksum
-                        )
-                        if checksum in target_files_dict and not file_exists:
-                            file_exists = True
-                    if not file_exists:
-                        extra_set.add(file["name"])
-
-                for extra in extra_set:
-                    extra_map[fileset_id].append({"name": extra})
-            if game_metadata:
-                platform = game_metadata["platform"]
-                lang = game_metadata["language"]
-                gameid = game_metadata["gameid"]
-                engineid = game_metadata["engineid"]
-                extra_info = game_metadata["extra"]
-                engine_name = " "
-                title = " "
-                insert_game(
-                    engine_name,
-                    engineid,
-                    title,
-                    gameid,
-                    extra_info,
-                    platform,
-                    lang,
+            if len(candidate_filesets) == 0:
+                (user_fileset_id, _) = insert_new_fileset(
+                    data,
                     conn,
+                    None,
+                    src,
+                    key,
+                    None,
+                    transaction_id,
+                    log_text,
+                    user,
+                    ip=ip,
                 )
-
-            # handle different scenarios
-            if len(matched_map) == 0:
-                insert_new_fileset(
-                    data, conn, None, src, key, None, transaction_id, log_text, user, ip
+                match_type = "no_candidate"
+                category_text = "New User Fileset"
+                engineid = (
+                    game_metadata["engineid"] if "engineid" in game_metadata else ""
                 )
-                return matched_map, missing_map, extra_map
-
-            matched_list = sorted(
-                matched_map.items(), key=lambda x: len(x[1]), reverse=True
-            )
-            most_matched = matched_list[0]
-            matched_fileset_id, matched_count = most_matched[0], most_matched[1]
-            cursor.execute(
-                "SELECT status FROM fileset WHERE id = %s", (matched_fileset_id,)
-            )
-            status = cursor.fetchone()["status"]
-
-            cursor.execute(
-                "SELECT COUNT(file.id) FROM file WHERE fileset = %s",
-                (matched_fileset_id,),
-            )
-            count = cursor.fetchone()["COUNT(file.id)"]
-            if status == "full" and count == matched_count:
-                log_matched_fileset(
-                    src, matched_fileset_id, matched_fileset_id, "full", user, conn
+                gameid = game_metadata["gameid"] if "gameid" in game_metadata else ""
+                platform = (
+                    game_metadata["platform"] if "platform" in game_metadata else ""
                 )
-            # elif status == "partial" and count == matched_count:
-            #     populate_file(data, matched_fileset_id, conn, None, src)
-            #     log_matched_fileset(
-            #         src, matched_fileset_id, matched_fileset_id, "partial", user, conn
-            #     )
-            elif status == "user" and count == matched_count:
-                add_usercount(matched_fileset_id, conn)
-                log_matched_fileset(
-                    src, matched_fileset_id, matched_fileset_id, "user", user, conn
+                language = (
+                    game_metadata["language"] if "language" in game_metadata else ""
                 )
+                log_text = f"New User Fileset:{user_fileset_id} with no matching candidates. Engine: {engineid} Name: {gameid}-{platform}-{language}"
+                create_log(category_text, user, log_text, conn)
+                conn.commit()
+                return (match_type, user_fileset_id, [], [], [])
+
             else:
-                insert_new_fileset(
-                    data, conn, None, src, key, None, transaction_id, log_text, user, ip
+                (
+                    match_type,
+                    user_fileset_id,
+                    matched_id,
+                    matched_user_files,
+                    unmatched_full_files,
+                    unmatched_user_files,
+                ) = user_perform_match(
+                    data,
+                    src,
+                    user,
+                    candidate_filesets,
+                    game_metadata,
+                    transaction_id,
+                    conn,
+                    ip,
                 )
+                if match_type == "multiple":
+                    # If multiple candidates matched, we will do manual review and ask user for more details.
+                    category_text = "User fileset - Multiple candidates"
+                    log_text = f"Possible new variant Fileset:{user_fileset_id} from user. Multiple filesets candidates {', '.join(f'Fileset:{id}' for id in candidate_filesets)}"
+                    create_log(
+                        category_text,
+                        user,
+                        log_text,
+                        conn,
+                    )
+                    conn.commit()
+                    return (
+                        match_type,
+                        user_fileset_id,
+                        matched_user_files,
+                        unmatched_full_files,
+                        unmatched_user_files,
+                    )
+                if match_type == "full":
+                    fully_matched = (
+                        True
+                        if len(unmatched_full_files) == 0
+                        and len(unmatched_user_files) == 0
+                        else False
+                    )
+                    log_user_match_with_full(
+                        matched_id,
+                        unmatched_full_files,
+                        unmatched_user_files,
+                        matched_user_files,
+                        fully_matched,
+                        user,
+                        conn,
+                    )
+                    conn.commit()
+                    return (
+                        match_type,
+                        matched_id,
+                        matched_user_files,
+                        unmatched_full_files,
+                        unmatched_user_files,
+                    )
+
             finalize_fileset_insertion(
                 conn, transaction_id, src, None, user, 0, source_status, user
             )
@@ -2550,22 +2665,91 @@ def user_integrity_check(data, ip, game_metadata=None):
         category_text = f"Uploaded from {src}"
         log_text = f"Completed loading file, State {source_status}. Transaction: {transaction_id}"
         create_log(category_text, user, log_text, conn)
-        # conn.close()
-    return matched_map, missing_map, extra_map
+        conn.close()
 
 
-def add_usercount(fileset, conn):
+def update_status_for_partial_filesets(fileset_list, conn):
+    """
+    Updates the status of the given filesets from partial to full, if all of their files have full checksums.
+    """
     with conn.cursor() as cursor:
-        cursor.execute(
-            "UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = %s",
-            (fileset,),
-        )
-        cursor.execute("SELECT user_count from fileset WHERE id = %s", (fileset,))
-        count = cursor.fetchone()["user_count"]
-        if count >= 3:
+        for fileset_id in fileset_list:
+            cursor.execute("SELECT status FROM fileset WHERE id = %s", (fileset_id,))
+            result = cursor.fetchone()
+            status = result["status"]
+            if status == "partial":
+                query = """
+                    SELECT f.id as file_id
+                    FROM file f
+                    JOIN fileset fs ON fs.id = f.fileset
+                    WHERE fs.id = %s
+                """
+                cursor.execute(query, (fileset_id,))
+                result = cursor.fetchall()
+                not_complete = False
+                for file in result:
+                    file_id = file["file_id"]
+                    query = """
+                        SELECT COUNT(*) AS count
+                        FROM filechecksum fc
+                        WHERE fc.file = %s
+                    """
+                    cursor.execute(query, (file_id,))
+                    checksum_count = cursor.fetchone()["count"]
+                    if checksum_count != 4:
+                        not_complete = True
+                        break
+                if not not_complete:
+                    query = """
+                        UPDATE fileset
+                        SET status = 'full'
+                        WHERE id = %s
+                    """
+                    cursor.execute(query, fileset_id)
+
+
+def add_usercount(fileset, ip, conn):
+    with conn.cursor() as cursor:
+        query = """
+            SELECT COUNT(*) AS count FROM queue
+            WHERE fileset = %s
+            AND ip = %s
+            LIMIT 1
+        """
+        cursor.execute(query, (fileset, ip))
+        duplicate = True if cursor.fetchone()["count"] != 0 else False
+        print("dupe ", duplicate)
+        if not duplicate:
             cursor.execute(
-                "UPDATE fileset SET status = 'ReadyForReview' WHERE id = %s", (fileset,)
+                "UPDATE fileset SET user_count = COALESCE(user_count, 0) + 1 WHERE id = %s",
+                (fileset,),
             )
+            query = """
+                INSERT INTO queue (time, fileset, ip)
+                VALUES (FROM_UNIXTIME(@fileset_time_last), %s, %s)
+            """
+            cursor.execute(query, (fileset, ip))
+            cursor.execute("SELECT user_count from fileset WHERE id = %s", (fileset,))
+            count = cursor.fetchone()["user_count"]
+            category_text = "Existing user fileset - different user."
+            log_text = f"User Fileset:{fileset} found. Match count: {count}."
+            create_log(category_text, ip, log_text, conn)
+            if count >= 3:
+                cursor.execute(
+                    "UPDATE fileset SET status = 'ReadyForReview' WHERE id = %s",
+                    (fileset,),
+                )
+                category_text = "Ready for Review"
+                log_text = (
+                    f"User Fileset:{fileset} ready for review. Match count: {count}."
+                )
+                create_log(category_text, ip, log_text, conn)
+        else:
+            cursor.execute("SELECT user_count from fileset WHERE id = %s", (fileset,))
+            count = cursor.fetchone()["user_count"]
+            category_text = "Existing user fileset - same user."
+            log_text = f"User Fileset:{fileset} exists. Match count: {count}."
+            create_log(category_text, ip, log_text, conn)
 
 
 def console_log(message):
diff --git a/fileset.py b/fileset.py
index bc7fa46f..5f2b2b0f 100644
--- a/fileset.py
+++ b/fileset.py
@@ -1423,25 +1423,6 @@ def validate():
 
     json_response = {"error": error_codes["success"], "files": []}
 
-    # if not game_metadata:
-    #     if not json_object.get("files"):
-    #         json_response["error"] = error_codes["empty"]
-    #         del json_response["files"]
-    #         json_response["status"] = "empty_fileset"
-    #         return jsonify(json_response)
-
-    #     json_response["error"] = error_codes["no_metadata"]
-    #     del json_response["files"]
-    #     json_response["status"] = "no_metadata"
-
-    #     conn = db_connect()
-    #     try:
-    #         fileset_id = user_insert_fileset(json_object, ip, conn)
-    #     finally:
-    #         conn.close()
-    #     json_response["fileset"] = fileset_id
-    #     return jsonify(json_response)
-
     file_object = json_object["files"]
     if not file_object:
         json_response["error"] = error_codes["empty"]
@@ -1449,9 +1430,16 @@ def validate():
         return jsonify(json_response)
 
     try:
-        matched_map, missing_map, extra_map = user_integrity_check(
-            json_object, ip, game_metadata
-        )
+        # match_type - no_candidate or multiple or full
+        # fileset_id - new user fileset id : if match_type is no_candidate or multiple
+        #            - matched fileset id : if match_type if full
+        (
+            match_type,
+            fileset_id,
+            matched_user_files,
+            unmatched_full_files,
+            unmatched_user_files,
+        ) = user_integrity_check(json_object, ip, game_metadata)
     except Exception as e:
         json_response["error"] = -1
         json_response["status"] = "processing_error"
@@ -1459,49 +1447,40 @@ def validate():
         json_response["message"] = str(e)
         print(f"Response: {json_response}")
         return jsonify(json_response)
-    print(f"Matched: {matched_map}")
-    print(len(matched_map))
-    if len(matched_map) == 0:
-        json_response["error"] = error_codes["unknown"]
-        json_response["status"] = "unknown_fileset"
-        json_response["fileset"] = "unknown_fileset"
+
+    # If no candidate was filtered out
+    if match_type == "no_candidate":
+        json_response["error"] = -1
+        json_response["status"] = "new_fileset"
+        json_response["fileset"] = str(fileset_id)
+        json_response["message"] = ""
+        print(f"Response: {json_response}")
         return jsonify(json_response)
-    matched_map = list(
-        sorted(matched_map.items(), key=lambda x: len(x[1]), reverse=True)
-    )[0]
-    matched_id = matched_map[0]
-    # find the same id in the missing_map and extra_map
-    for fileset_id, count in missing_map.items():
-        if fileset_id == matched_id:
-            missing_map = (fileset_id, count)
-            break
-
-    for fileset_id, count in extra_map.items():
-        if fileset_id == matched_id:
-            extra_map = (fileset_id, count)
-            break
-
-    for file in matched_map[1]:
-        for key, value in file.items():
-            if key == "name":
-                json_response["files"].append(
-                    {"status": "ok", "fileset_id": matched_id, "name": value}
-                )
-                break
-    for file in missing_map[1]:
-        for key, value in file.items():
-            if key == "name":
-                json_response["files"].append(
-                    {"status": "missing", "fileset_id": matched_id, "name": value}
-                )
-                break
-    for file in extra_map[1]:
-        for key, value in file.items():
-            if key == "name":
-                json_response["files"].append(
-                    {"status": "unknown_file", "fileset_id": matched_id, "name": value}
-                )
-                break
+
+    # If match was with multiple candidates
+    if match_type == "multiple":
+        json_response["error"] = -1
+        json_response["status"] = "possible_new_variant"
+        json_response["fileset"] = str(fileset_id)
+        json_response["message"] = ""
+        print(f"Response: {json_response}")
+        return jsonify(json_response)
+
+    # If match was with full
+    for file in matched_user_files:
+        json_response["files"].append(
+            {"status": "ok", "fileset_id": fileset_id, "name": file}
+        )
+    for file in unmatched_full_files:
+        json_response["files"].append(
+            {"status": "missing/unmatched", "fileset_id": fileset_id, "name": file}
+        )
+
+    for file in unmatched_user_files:
+        json_response["files"].append(
+            {"status": "unknown_file", "fileset_id": fileset_id, "name": file}
+        )
+
     print(f"Response: {json_response}")
     return jsonify(json_response)
 
diff --git a/schema.py b/schema.py
index 776eadaa..5a42f295 100644
--- a/schema.py
+++ b/schema.py
@@ -102,10 +102,9 @@ def init_database():
             CREATE TABLE IF NOT EXISTS queue (
                 id INT AUTO_INCREMENT PRIMARY KEY,
                 time TIMESTAMP NOT NULL,
-                notes varchar(300),
+                notes varchar(300) DEFAULT '',
                 fileset INT,
-                userid INT NOT NULL,
-                commit VARCHAR(64) NOT NULL,
+                ip VARCHAR(100) NOT NULL,
                 FOREIGN KEY (fileset) REFERENCES fileset(id)
             )
         """,

From 9213075ec88de3ed2bddf9ed3b7890d7e1b2fe99 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 31 Jul 2025 04:54:52 +0530
Subject: [PATCH 25/47] INTEGRITY: Add comprehensive search criterias in log
 with OR, AND conditions.

---
 fileset.py            |  12 ++--
 pagination.py         | 149 +++++++++++++++++++++---------------------
 templates/config.html |   2 +-
 templates/home.html   |   2 +-
 4 files changed, 83 insertions(+), 82 deletions(-)

diff --git a/fileset.py b/fileset.py
index 5f2b2b0f..4eae1dd0 100644
--- a/fileset.py
+++ b/fileset.py
@@ -37,7 +37,7 @@
 
 @app.route("/")
 def index():
-    return redirect(url_for("logs"))
+    return redirect(url_for("logs", sort="id-desc"))
 
 
 @app.route("/home")
@@ -141,7 +141,7 @@ def fileset():
                     <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
                     <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
-                    <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                    <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
                     <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
             </nav>
@@ -505,7 +505,7 @@ def merge_fileset(id):
                         <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                         <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
                         <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
-                        <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                        <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
                         <a href="{{{{ url_for('config') }}}}">Config</a>
                     </div>
                 </nav>
@@ -553,7 +553,7 @@ def merge_fileset(id):
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
-            <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
@@ -621,7 +621,7 @@ def possible_merge_filesets(id):
                     <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
                     <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
-                    <a href="{{{{ url_for('logs') }}}}">Logs</a>
+                    <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
                     <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
             </nav>
@@ -824,7 +824,7 @@ def highlight_differences(source, target):
                     <a href="{{ url_for('user_games_list') }}">User Games List</a>
                     <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
                     <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
-                    <a href="{{ url_for('logs') }}">Logs</a>
+                    <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
                     <a href="{{ url_for('config') }}">Config</a>
                 </div>
             </nav>
diff --git a/pagination.py b/pagination.py
index 22f7930c..339fc819 100644
--- a/pagination.py
+++ b/pagination.py
@@ -22,6 +22,30 @@ def get_join_columns(table1, table2, mapping):
     return "No primary-foreign key mapping provided. Filter is invalid"
 
 
+def build_search_condition(value, column):
+    phrases = re.findall(r'"([^"]+)"', value)
+    if phrases:
+        conditions = [f"{column} REGEXP '{re.escape(p)}'" for p in phrases]
+        return " AND ".join(conditions)
+
+    if "+" in value:
+        and_terms = value.split("+")
+        and_conditions = []
+        for term in and_terms:
+            or_terms = term.strip().split()
+            if len(or_terms) > 1:
+                or_cond = " OR ".join(
+                    [f"{column} REGEXP '{re.escape(t)}'" for t in or_terms if t]
+                )
+                and_conditions.append(f"({or_cond})")
+            else:
+                and_conditions.append(f"{column} REGEXP '{re.escape(term.strip())}'")
+        return " AND ".join(and_conditions)
+    else:
+        or_terms = value.split()
+        return " OR ".join([f"{column} REGEXP '{re.escape(t)}'" for t in or_terms if t])
+
+
 def create_page(
     filename,
     results_per_page,
@@ -46,62 +70,47 @@ def create_page(
     )
 
     with conn.cursor() as cursor:
-        # Handle sorting
-        sort = request.args.get("sort")
-        if sort:
-            column = sort.split("-")
-            order = f"ORDER BY {column[0]}"
-            if "desc" in sort:
-                order += " DESC"
+        tables = set()
+        where_clauses = []
 
-        if set(request.args.keys()).difference({"page", "sort"}):
-            condition = "WHERE "
-            tables = set()
-            for key, value in request.args.items():
-                if key in ["page", "sort"] or value == "":
-                    continue
-                tables.add(filters[key])
-                if value == "":
-                    value = ".*"
-                condition += (
-                    f" AND {filters[key]}.{'id' if key == 'fileset' else key} REGEXP '{value}'"
-                    if condition != "WHERE "
-                    else f"{filters[key]}.{'id' if key == 'fileset' else key} REGEXP '{value}'"
-                )
+        for key, value in request.args.items():
+            if key in ("page", "sort") or value == "":
+                continue
+            tables.add(filters[key])
+            col = f"{filters[key]}.{'id' if key == 'fileset' else key}"
+            parsed = build_search_condition(value, col)
+            if parsed:
+                where_clauses.append(parsed)
 
-            if condition == "WHERE ":
-                condition = ""
+        condition = ""
+        if where_clauses:
+            condition = "WHERE " + " AND ".join(where_clauses)
 
-            # Handle multiple tables
-            from_query = records_table
-            join_order = ["game", "engine"]
-            tables_list = sorted(
-                list(tables),
-                key=lambda t: join_order.index(t) if t in join_order else 99,
-            )
-            if records_table not in tables_list or len(tables_list) > 1:
-                for table in tables_list:
-                    if table == records_table:
-                        continue
-                    if table == "engine":
-                        if "game" in tables:
-                            from_query += " JOIN engine ON engine.id = game.engine"
-                        else:
-                            from_query += " JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine"
+        from_query = records_table
+        join_order = ["game", "engine"]
+        tables_list = sorted(
+            list(tables), key=lambda t: join_order.index(t) if t in join_order else 99
+        )
+
+        if records_table not in tables_list or len(tables_list) > 1:
+            for t in tables_list:
+                if t == records_table:
+                    continue
+                if t == "engine":
+                    if "game" in tables:
+                        from_query += " JOIN engine ON engine.id = game.engine"
                     else:
-                        from_query += f" JOIN {table} ON {get_join_columns(records_table, table, mapping)}"
-            cursor.execute(
-                f"SELECT COUNT({records_table}.id) AS count FROM {from_query} {condition}"
-            )
-            num_of_results = cursor.fetchone()["count"]
+                        from_query += " JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine"
+                else:
+                    from_query += (
+                        f" JOIN {t} ON {get_join_columns(records_table, t, mapping)}"
+                    )
 
-        elif "JOIN" in records_table:
-            first_table = records_table.split(" ")[0]
-            cursor.execute(f"SELECT COUNT({first_table}.id) FROM {records_table}")
-            num_of_results = cursor.fetchone()[f"COUNT({first_table}.id)"]
-        else:
-            cursor.execute(f"SELECT COUNT(id) FROM {records_table}")
-            num_of_results = cursor.fetchone()["COUNT(id)"]
+        base_table = records_table.split(" ")[0]
+        cursor.execute(
+            f"SELECT COUNT({base_table}.id) AS count FROM {from_query} {condition}"
+        )
+        num_of_results = cursor.fetchone()["count"]
 
         num_of_pages = (num_of_results + results_per_page - 1) // results_per_page
         print(f"Num of results: {num_of_results}, Num of pages: {num_of_pages}")
@@ -110,29 +119,21 @@ def create_page(
         page = max(1, min(page, num_of_pages))
         offset = (page - 1) * results_per_page
 
-        # Fetch results
-        if set(request.args.keys()).difference({"page"}):
-            condition = "WHERE "
-            for key, value in request.args.items():
-                if key not in filters:
-                    continue
-
-                value = pymysql.converters.escape_string(value)
-                if value == "":
-                    value = ".*"
-                field = f"{filters[key]}.{'id' if key == 'fileset' else key}"
-                if value == ".*":
-                    clause = f"({field} IS NULL OR {field} REGEXP '{value}')"
-                else:
-                    clause = f"{field} REGEXP '{value}'"
-                condition += f" AND {clause}" if condition != "WHERE " else clause
-
-            if condition == "WHERE ":
-                condition = ""
-
-            query = f"{select_query} {condition} {order} LIMIT {results_per_page} OFFSET {offset}"
+        # Sort
+        order = ""
+        sort_param = request.args.get("sort")
+        if sort_param:
+            sort_parts = sort_param.split("-")
+            sort_col = sort_parts[0]
+            order = f"ORDER BY {sort_col}"
+            if "desc" in sort_param:
+                order += " DESC"
         else:
-            query = f"{select_query} {order} LIMIT {results_per_page} OFFSET {offset}"
+            if records_table == "log":
+                order = "ORDER BY `id` DESC"
+
+        # Fetch results
+        query = f"{select_query} {condition} {order} LIMIT {results_per_page} OFFSET {offset}"
         cursor.execute(query)
         results = cursor.fetchall()
 
@@ -154,7 +155,7 @@ def create_page(
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
-            <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
diff --git a/templates/config.html b/templates/config.html
index a578779d..d730f153 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -117,7 +117,7 @@
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
-            <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('logs', sort='id-desc')}}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
diff --git a/templates/home.html b/templates/home.html
index c41cda05..ec093df7 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -89,7 +89,7 @@
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
             <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
-            <a href="{{ url_for('logs') }}">Logs</a>
+            <a href="{{ url_for('logs', sort='id-desc')}}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
         <div class="dev">

From 45cbd16ea347fc54bac2d8ed18ea6a6bc4caa9a8 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 31 Jul 2025 13:23:33 +0530
Subject: [PATCH 26/47] INTEGRITY: Add separate logs per page and filesets per
 page in config.

---
 fileset.py            | 84 +++++++++++++++----------------------------
 templates/config.html | 81 +++++++++++++++++++++++++----------------
 2 files changed, 80 insertions(+), 85 deletions(-)

diff --git a/fileset.py b/fileset.py
index 4eae1dd0..3ba6f995 100644
--- a/fileset.py
+++ b/fileset.py
@@ -1383,26 +1383,41 @@ def config():
     Stores the user configurations in the cookies
     """
     if request.method == "POST":
-        items_per_page = request.form.get("items_per_page", "25")
+        filesets_per_page = request.form.get("filesets_per_page", "25")
+        logs_per_page = request.form.get("logs_per_page", "25")
 
         try:
-            items_per_page_int = int(items_per_page)
-            if items_per_page_int < 1:
-                items_per_page = "1"
+            filesets_per_page_int = int(filesets_per_page)
+            logs_per_page_int = int(logs_per_page)
+            if filesets_per_page_int < 1:
+                filesets_per_page = "1"
+            if logs_per_page_int < 1:
+                logs_per_page_int = "1"
         except ValueError:
-            items_per_page = "25"
+            filesets_per_page = "25"
+            logs_per_page = "25"
 
         resp = make_response(redirect(url_for("config")))
-        resp.set_cookie("items_per_page", items_per_page, max_age=365 * 24 * 60 * 60)
+        resp.set_cookie(
+            "filesets_per_page", filesets_per_page, max_age=365 * 24 * 60 * 60
+        )
+        resp.set_cookie("logs_per_page", logs_per_page, max_age=365 * 24 * 60 * 60)
         return resp
 
-    items_per_page = int(request.cookies.get("items_per_page", "25"))
+    filesets_per_page = int(request.cookies.get("filesets_per_page", "25"))
+    logs_per_page = int(request.cookies.get("logs_per_page", "25"))
+
+    return render_template(
+        "config.html", filesets_per_page=filesets_per_page, logs_per_page=logs_per_page
+    )
+
 
-    return render_template("config.html", items_per_page=items_per_page)
+def get_filesets_per_page():
+    return int(request.cookies.get("filesets_per_page", "25"))
 
 
-def get_items_per_page():
-    return int(request.cookies.get("items_per_page", "25"))
+def get_logs_per_page():
+    return int(request.cookies.get("logs_per_page", "25"))
 
 
 @app.route("/validate", methods=["POST"])
@@ -1497,47 +1512,6 @@ def ready_for_review():
     return redirect(url)
 
 
-@app.route("/games_list")
-def games_list():
-    filename = "games_list"
-    records_table = "game"
-    select_query = """
-    SELECT engineid, gameid, extra, platform, language, game.name,
-    status, fileset.id as fileset
-    FROM game
-    JOIN engine ON engine.id = game.engine
-    JOIN fileset ON game.id = fileset.game
-    """
-    order = "ORDER BY gameid"
-    filters = {
-        "engineid": "engine",
-        "gameid": "game",
-        "extra": "game",
-        "platform": "game",
-        "language": "game",
-        "name": "game",
-        "status": "fileset",
-    }
-    mapping = {
-        "engine.id": "game.engine",
-        "game.id": "fileset.game",
-    }
-
-    items_per_page = get_items_per_page()
-
-    return render_template_string(
-        create_page(
-            filename,
-            items_per_page,
-            records_table,
-            select_query,
-            order,
-            filters,
-            mapping,
-        )
-    )
-
-
 @app.route("/logs")
 def logs():
     filename = "logs"
@@ -1551,10 +1525,10 @@ def logs():
         "user": "log",
         "text": "log",
     }
-    items_per_page = get_items_per_page()
+    logs_per_page = get_logs_per_page()
     return render_template_string(
         create_page(
-            filename, items_per_page, records_table, select_query, order, filters
+            filename, logs_per_page, records_table, select_query, order, filters
         )
     )
 
@@ -1588,11 +1562,11 @@ def fileset_search():
         "engine.id": "game.engine",
         "fileset.id": "transactions.fileset",
     }
-    items_per_page = get_items_per_page()
+    filesets_per_page = get_filesets_per_page()
     return render_template_string(
         create_page(
             filename,
-            items_per_page,
+            filesets_per_page,
             records_table,
             select_query,
             order,
diff --git a/templates/config.html b/templates/config.html
index d730f153..a524ac3c 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -23,7 +23,6 @@
             background-color: #ffffff;
             color: #000000;
             padding: 10px;
-            font-size: 50px;
             align-self: flex-start;
             margin-left: 2vh;
         }
@@ -35,51 +34,58 @@
 
         .config-section {
             margin-bottom: 30px;
-            padding: 20px;
+            padding: 25px;
+            padding-left: 50px;
+            padding-right: 50px;
             border: 1px solid #ddd;
             border-radius: 8px;
             background-color: #f9f9f9;
-            max-width: 600px;
-            margin-left: auto;
-            margin-right: auto;
+            width: 100%;
+            box-sizing: border-box;
         }
 
         .config-item {
             display: flex;
-            align-items: center;
-            margin-bottom: 15px;
+            flex-direction: column;
+            margin-bottom: 20px;
+            gap: 10px;
         }
 
         .config-item label {
-            flex: 1;
-            margin-right: 15px;
             font-weight: 500;
+            font-size: 14px;
+            color: #333;
         }
 
         .current-value {
             font-style: italic;
             color: #666;
             font-size: 12px;
+            margin-top: 4px;
+        }
+
+        .input-container {
+            display: flex;
+            align-items: center;
+            gap: 15px;
+            margin-bottom: 20px;
+        }
+
+        .submit-section {
+            padding: 20px 0;
+            border-top: 1px solid #ddd;
+            margin-top: 20px;
         }
 
         .config-item input,
         .config-item select {
-            padding: 8px 12px;
+            padding: 10px 12px;
             border: 1px solid #ccc;
-            border-radius: 4px;
+            border-radius: 6px;
             font-size: 14px;
-            min-width: 120px;
-            margin-right: 2vw;
-        }
-
-        .success-message {
-            background-color: #d4edda;
-            color: #155724;
-            padding: 10px;
-            border: 1px solid #c3e6cb;
-            border-radius: 4px;
-            margin-bottom: 20px;
-            text-align: center;
+            width: 200px;
+            box-sizing: border-box;
+            transition: border-color 0.3s ease;
         }
 
         @media (max-width: 768px) {
@@ -124,24 +130,39 @@
     <div class="content">
         <h1 class="title">User Configurations</h1>
         <div class="main">
-            <form method="POST" action="{{ url_for('config') }}">
+            <form class="config-form" method="POST" action="{{ url_for('config') }}">
                 <div class="config-section">
                     <div class="config-item">
-                        <label for="items_per_page">Number of items per page:</label>
-                        <input type="number" name="items_per_page" id="items_per_page" value="{{ items_per_page }}"
-                            min="1">
-                        <div class="current-value">Current: {{ items_per_page }}</div>
+                        <label for="filesets_per_page">Number of filesets per page:</label>
+                        <div class="input-container">
+                            <input type="number" name="filesets_per_page" id="filesets_per_page" value="{{ filesets_per_page }}"
+                                min="1">
+                            <div class="current-value">Default: 25</div>
+                        </div>
+                        <label for="logs_per_page">Number of logs per page:</label>
+                        <div class="input-container">
+                            <input type="number" name="logs_per_page" id="logs_per_page" value="{{ logs_per_page }}"
+                                min="1">
+                            <div class="current-value">Default: 25</div>
+                        </div>
                     </div>
-                    <div style="text-align: center; padding: 20px;">
+                    <div class="submit-section">
                         <button type="submit">Save Configuration</button>
                     </div>
+                </div>
             </form>
         </div>
     </div>
     <script>
-        document.getElementById('items_per_page').addEventListener('input', function () {
+        document.getElementById('filesets_per_page').addEventListener('input', function () {
+            const value = parseInt(this.value);
+            if (value < 1) this.value = 1;
+            if (value > 250) this.value = 250;
+        });
+        document.getElementById('logs_per_page').addEventListener('input', function () {
             const value = parseInt(this.value);
             if (value < 1) this.value = 1;
+            if (value > 250) this.value = 250;
         });
     </script>
 </body>

From 6b44b812e3506696f5c7298ee8f6fcd54760b497 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 31 Jul 2025 14:04:14 +0530
Subject: [PATCH 27/47] INTEGRITY: Add/update deployment related files.

---
 apache2-config/gamesdb.sev.zone.conf | 11 +++++----
 app.wsgi                             | 12 ++++++++++
 fileset.py                           |  2 +-
 requirements.txt                     | 36 ++++++++++++++--------------
 4 files changed, 38 insertions(+), 23 deletions(-)
 create mode 100644 app.wsgi

diff --git a/apache2-config/gamesdb.sev.zone.conf b/apache2-config/gamesdb.sev.zone.conf
index 8b37f5be..43563729 100644
--- a/apache2-config/gamesdb.sev.zone.conf
+++ b/apache2-config/gamesdb.sev.zone.conf
@@ -4,12 +4,15 @@
     ServerAdmin webmaster@localhost
     CustomLog ${APACHE_LOG_DIR}/integrity-access.log combined
     ErrorLog ${APACHE_LOG_DIR}/integrity-error.log
-    DocumentRoot /home/ubuntu/projects/python/scummvm-sites
+    DocumentRoot /home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites
     WSGIDaemonProcess scummvm-sites user=www-data group=www-data threads=5
-    WSGIScriptAlias / /home/ubuntu/projects/python/scummvm-sites/app.wsgi
+    WSGIScriptAlias / /home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites/app.wsgi
 
-    <Directory /home/ubuntu/projects/python/scummvm-sites>
-        Require all granted
+    <Directory /home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites>
+        AuthType Basic
+	AuthName "nope"
+	AuthUserFile /home/ubuntu/projects/python/scummvm_sites_2025/.htpasswd
+	Require valid-user
     </Directory>
 
 </VirtualHost>
diff --git a/app.wsgi b/app.wsgi
new file mode 100644
index 00000000..a52d3aba
--- /dev/null
+++ b/app.wsgi
@@ -0,0 +1,12 @@
+import sys
+import logging
+
+sys.path.insert(0, "/home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites")
+
+from fileset import app as application
+
+logging.basicConfig(stream=sys.stderr)
+sys.stderr = sys.stdout
+
+if __name__ == "__main__":
+    application.run()
diff --git a/fileset.py b/fileset.py
index 3ba6f995..42d07d31 100644
--- a/fileset.py
+++ b/fileset.py
@@ -1597,4 +1597,4 @@ def delete_files(id):
 
 if __name__ == "__main__":
     app.secret_key = secret_key
-    app.run(port=5001, debug=True, host="0.0.0.0")
+    app.run(debug=True, host="0.0.0.0")
diff --git a/requirements.txt b/requirements.txt
index 8486da70..8340e269 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,18 +1,18 @@
-blinker==1.9.0
-cffi==1.17.1
-click==8.1.8
-cryptography==45.0.3
-Flask==3.1.0
-iniconfig==2.1.0
-itsdangerous==2.2.0
-Jinja2==3.1.5
-MarkupSafe==3.0.2
-packaging==25.0
-pluggy==1.6.0
-pycparser==2.22
-Pygments==2.19.1
-PyMySQL==1.1.1
-pytest==8.4.0
-setuptools==75.8.0
-Werkzeug==3.1.3
-wheel==0.45.1
+blinker
+cffi
+click
+cryptography
+Flask
+iniconfig
+itsdangerous
+Jinja2
+MarkupSafe
+packaging
+pluggy
+pycparser
+Pygments
+PyMySQL
+pytest
+setuptools
+Werkzeug
+wheel

From 7aabe392e9e982633b76061d3753e733959f6906 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 31 Jul 2025 14:29:05 +0530
Subject: [PATCH 28/47] INTEGRITY: Add underline on hover for navbar links.

---
 static/style.css | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/static/style.css b/static/style.css
index 527824b7..b93da503 100644
--- a/static/style.css
+++ b/static/style.css
@@ -67,9 +67,9 @@ nav {
   margin-left: 10px;
 }
 
-/* .nav-buttons a:hover {
-  box-shadow: 0 4px 12px rgba(39, 145, 232, 0.4);
-} */
+.nav-buttons a:hover {
+  text-decoration: underline;
+}
 
 .logo img {
   height: 75px;

From 9ac4cf7b14776c1991971619ab6f3c3a5a3931ed Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 31 Jul 2025 14:47:19 +0530
Subject: [PATCH 29/47] INTEGRITY: Add favicon.

---
 fileset.py            | 10 ++++++++++
 pagination.py         |  2 ++
 templates/config.html |  2 ++
 templates/home.html   |  2 ++
 4 files changed, 16 insertions(+)

diff --git a/fileset.py b/fileset.py
index 42d07d31..db0c89c7 100644
--- a/fileset.py
+++ b/fileset.py
@@ -129,6 +129,8 @@ def fileset():
             <html>
             <head>
                 <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
+                <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+                <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
             </head>
             <body>
             <nav>
@@ -493,6 +495,8 @@ def merge_fileset(id):
                 <html>
                 <head>
                     <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
+                    <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+                    <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
                 </head>
                 <body>
                 <nav>
@@ -541,6 +545,8 @@ def merge_fileset(id):
     <html>
     <head>
         <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+        <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+        <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
     </head>
     <body>
     <nav>
@@ -609,6 +615,8 @@ def possible_merge_filesets(id):
             <html>
             <head>
                 <link rel="stylesheet" type="text/css" href="{{{{ url_for('static', filename='style.css') }}}}">
+                <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+                <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
             </head>
             <body>
             <nav>
@@ -812,6 +820,8 @@ def highlight_differences(source, target):
             <html>
             <head>
                 <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+                <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+                <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
             </head>
             <body>
             <nav>
diff --git a/pagination.py b/pagination.py
index 339fc819..37c5c28d 100644
--- a/pagination.py
+++ b/pagination.py
@@ -143,6 +143,8 @@ def create_page(
     <html>
     <head>
         <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+        <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+        <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
     </head>
     <body>
     <nav>
diff --git a/templates/config.html b/templates/config.html
index a524ac3c..73b37a91 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -3,6 +3,8 @@
 
 <head>
     <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+    <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+    <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
     <style>
         body {
             margin: 0;
diff --git a/templates/home.html b/templates/home.html
index ec093df7..19536411 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -3,6 +3,8 @@
 
 <head>
     <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+    <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+    <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
     <style>
         body {
             margin: 0;

From 086019821484417bb49e1d92eb8fa2e314679937 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 1 Aug 2025 03:49:06 +0530
Subject: [PATCH 30/47] INTEGRITY: Add column width variable in config.

---
 fileset.py                       | 90 +++++++++++++++++++++++++++++---
 pagination.py                    | 87 ++++++++++++++++--------------
 templates/config.html            | 63 +++++++++++++++++++---
 templates/pagination/navbar.html | 22 ++++++++
 4 files changed, 211 insertions(+), 51 deletions(-)
 create mode 100644 templates/pagination/navbar.html

diff --git a/fileset.py b/fileset.py
index db0c89c7..aaffe4a3 100644
--- a/fileset.py
+++ b/fileset.py
@@ -1392,10 +1392,60 @@ def config():
     """
     Stores the user configurations in the cookies
     """
+
+    fileset_dashboard_widths_default = {
+        "fileset_serial_no": "5",
+        "fileset_id": "5",
+        "fileset_engineid": "10",
+        "fileset_gameid": "10",
+        "fileset_extra": "10",
+        "fileset_platform": "10",
+        "fileset_language": "10",
+        "fileset_status": "10",
+        "fileset_transaction": "30",
+    }
+    log_dashboard_widths_default = {
+        "log_serial_no": "4",
+        "log_id": "4",
+        "log_timestamp": "10",
+        "log_category": "15",
+        "log_user": "10",
+        "log_text": "57",
+    }
+    fileset_dashboard_widths = defaultdict(str)
+
+    fileset_fields = [
+        ("fileset_serial_no", "S. No."),
+        ("fileset_id", "fileset"),
+        ("fileset_engineid", "engineid"),
+        ("fileset_gameid", "gameid"),
+        ("fileset_extra", "extra"),
+        ("fileset_platform", "platform"),
+        ("fileset_language", "language"),
+        ("fileset_status", "status"),
+        ("fileset_transaction", "transaction"),
+    ]
+    log_fields = [
+        ("log_serial_no", "S. No."),
+        ("log_id", "id"),
+        ("log_timestamp", "timestamp"),
+        ("log_category", "category"),
+        ("log_text", "text"),
+    ]
+
     if request.method == "POST":
         filesets_per_page = request.form.get("filesets_per_page", "25")
         logs_per_page = request.form.get("logs_per_page", "25")
 
+        fileset_dashboard_widths = {
+            field: request.form.get(field, default)
+            for field, default in fileset_dashboard_widths_default.items()
+        }
+        log_dashboard_widths = {
+            field: request.form.get(field, default)
+            for field, default in log_dashboard_widths_default.items()
+        }
+
         try:
             filesets_per_page_int = int(filesets_per_page)
             logs_per_page_int = int(logs_per_page)
@@ -1403,6 +1453,12 @@ def config():
                 filesets_per_page = "1"
             if logs_per_page_int < 1:
                 logs_per_page_int = "1"
+            fileset_dashboard_widths = {
+                k: str(max(1, int(v))) for k, v in fileset_dashboard_widths.items()
+            }
+            log_dashboard_widths = {
+                k: str(max(1, int(v))) for k, v in log_dashboard_widths.items()
+            }
         except ValueError:
             filesets_per_page = "25"
             logs_per_page = "25"
@@ -1412,13 +1468,33 @@ def config():
             "filesets_per_page", filesets_per_page, max_age=365 * 24 * 60 * 60
         )
         resp.set_cookie("logs_per_page", logs_per_page, max_age=365 * 24 * 60 * 60)
+        for field, value in fileset_dashboard_widths.items():
+            resp.set_cookie(field, value, max_age=365 * 24 * 60 * 60)
+        for field, value in log_dashboard_widths.items():
+            resp.set_cookie(field, value, max_age=365 * 24 * 60 * 60)
+
         return resp
 
     filesets_per_page = int(request.cookies.get("filesets_per_page", "25"))
     logs_per_page = int(request.cookies.get("logs_per_page", "25"))
 
+    fileset_dashboard_widths = {
+        field: [int(request.cookies.get(field, default)), default]
+        for field, default in fileset_dashboard_widths_default.items()
+    }
+    log_dashboard_widths = {
+        field: [int(request.cookies.get(field, default)), default]
+        for field, default in log_dashboard_widths_default.items()
+    }
+
     return render_template(
-        "config.html", filesets_per_page=filesets_per_page, logs_per_page=logs_per_page
+        "config.html",
+        filesets_per_page=filesets_per_page,
+        logs_per_page=logs_per_page,
+        fileset_dashboard_widths=fileset_dashboard_widths,
+        fileset_fields=fileset_fields,
+        log_dashboard_widths=log_dashboard_widths,
+        log_fields=log_fields,
     )
 
 
@@ -1430,6 +1506,10 @@ def get_logs_per_page():
     return int(request.cookies.get("logs_per_page", "25"))
 
 
+def get_width(name, default):
+    return int(request.cookies.get(name, default))
+
+
 @app.route("/validate", methods=["POST"])
 def validate():
     error_codes = {
@@ -1548,8 +1628,7 @@ def fileset_search():
     filename = "fileset_search"
     records_table = "fileset"
     select_query = """
-    SELECT fileset.id as fileset, extra, platform, language, game.gameid, megakey,
-    status, transaction, engineid
+    SELECT fileset.id as fileset, engineid, game.gameid, extra, platform, language, status, transaction
     FROM fileset
     LEFT JOIN game ON game.id = fileset.game
     LEFT JOIN engine ON engine.id = game.engine
@@ -1558,14 +1637,13 @@ def fileset_search():
     order = "ORDER BY fileset.id"
     filters = {
         "fileset": "fileset",
+        "engineid": "engine",
+        "gameid": "game",
         "extra": "game",
         "platform": "game",
         "language": "game",
-        "gameid": "game",
-        "megakey": "fileset",
         "status": "fileset",
         "transaction": "transactions",
-        "engineid": "engine",
     }
     mapping = {
         "game.id": "fileset.game",
diff --git a/pagination.py b/pagination.py
index 37c5c28d..530464b6 100644
--- a/pagination.py
+++ b/pagination.py
@@ -4,11 +4,8 @@
 import re
 import os
 
-app = Flask(__name__)
 
-stylesheet = "style.css"
-jquery_file = "https://code.jquery.com/jquery-3.7.0.min.js"
-js_file = "js_functions.js"
+app = Flask(__name__)
 
 
 def get_join_columns(table1, table2, mapping):
@@ -137,50 +134,64 @@ def create_page(
         cursor.execute(query)
         results = cursor.fetchall()
 
+    # Initial html code including the navbar is stored in a separate html file.
+    html = ""
+    with open("templates/pagination/navbar.html", "r") as f:
+        html = f.read()
+
     # Generate HTML
-    html = """
-<!DOCTYPE html>
-    <html>
-    <head>
-        <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
-        <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
-        <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
-    </head>
-    <body>
-    <nav>
-        <div class="logo">
-            <a href="{{ url_for('home') }}">
-                <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
-            </a>
-        </div>
-        <div class="nav-buttons">
-            <a href="{{ url_for('user_games_list') }}">User Games List</a>
-            <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
-            <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
-            <a href="{{ url_for('config') }}">Config</a>
-        </div>
-    </nav>
-<form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
-<table style="margin-top: 80px;">
-"""
+    html += """
+        <form id='filters-form' method='GET' onsubmit='remove_empty_inputs()'>
+        <table class="fixed-table" style="margin-top: 80px;">
+    """
+
+    from fileset import get_width
+
+    if records_table == "fileset":
+        fileset_dashboard_widths_default = {
+            "fileset_serial_no": "5",
+            "fileset_id": "5",
+            "fileset_engineid": "10",
+            "fileset_gameid": "10",
+            "fileset_extra": "10",
+            "fileset_platform": "10",
+            "fileset_language": "10",
+            "fileset_status": "10",
+            "fileset_transaction": "30",
+        }
+        html += "<colgroup>"
+        for name, default in fileset_dashboard_widths_default.items():
+            width = get_width(name, default)
+            html += f"<col style='width: {width}%;'>"
+        html += "</colgroup>"
+    if records_table == "log":
+        log_dashboard_widths_default = {
+            "log_serial_no": "4",
+            "log_id": "4",
+            "log_timestamp": "10",
+            "log_category": "15",
+            "log_user": "10",
+            "log_text": "57",
+        }
+        html += "<colgroup>"
+        for name, default in log_dashboard_widths_default.items():
+            width = get_width(name, default)
+            html += f"<col style='width: {width}%;'>"
+        html += "</colgroup>"
+
     if filters:
         if records_table != "log":
-            html += "<tr class='filter'><td></td><td></td>"
+            html += """<tr class='filter'><td class='filter'><input type='submit' value='Submit'></td>"""
         else:
-            html += "<tr class='filter'><td></td>"
+            html += """<tr class='filter'><td class='filter'><input type='submit' value='Submit'></td>"""
 
         for key in filters.keys():
             filter_value = request.args.get(key, "")
             html += f"<td class='filter'><input type='text' class='filter' placeholder='{key}' name='{key}' value='{filter_value}'/></td>"
-        html += "</tr><tr class='filter'><td></td><td></td><td class='filter'><input type='submit' value='Submit'></td></tr>"
+        html += "</tr>"
 
-    html += "<th>#</th>"
-    if records_table != "log":
-        html += "<th>Fileset ID</th>"
+    html += "<th>S. No.</th>"
     for key in filters.keys():
-        if key in ["fileset", "fileset_id"]:
-            continue
         vars = "&".join([f"{k}={v}" for k, v in request.args.items() if k != "sort"])
         sort = request.args.get("sort", "")
         if sort == key:
diff --git a/templates/config.html b/templates/config.html
index 73b37a91..0a7b6b93 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -20,7 +20,7 @@
         }
 
         .title {
-            margin-top: 8vh;
+            margin-top: 150px;
             text-align: center;
             background-color: #ffffff;
             color: #000000;
@@ -90,6 +90,12 @@
             transition: border-color 0.3s ease;
         }
 
+        .width-container {
+            display: flex;
+            gap: 30px;
+            flex-wrap: wrap;
+        }
+
         @media (max-width: 768px) {
             .config {
                 font-size: 40px;
@@ -135,10 +141,11 @@ <h1 class="title">User Configurations</h1>
             <form class="config-form" method="POST" action="{{ url_for('config') }}">
                 <div class="config-section">
                     <div class="config-item">
+                        <h4>Number of items per page</h4>
                         <label for="filesets_per_page">Number of filesets per page:</label>
                         <div class="input-container">
-                            <input type="number" name="filesets_per_page" id="filesets_per_page" value="{{ filesets_per_page }}"
-                                min="1">
+                            <input type="number" name="filesets_per_page" id="filesets_per_page"
+                                value="{{ filesets_per_page }}" min="1">
                             <div class="current-value">Default: 25</div>
                         </div>
                         <label for="logs_per_page">Number of logs per page:</label>
@@ -147,11 +154,38 @@ <h1 class="title">User Configurations</h1>
                                 min="1">
                             <div class="current-value">Default: 25</div>
                         </div>
+
+                        <h4>Field Widths for Fileset Search Dashboard</h4>
+                        <div class="width-container">
+                            {% for field_name, label in fileset_fields %}
+                            <div style="display: flex; flex-direction: column; gap: 10px">
+                                <label for="{{ field_name }}">{{ label }}:</label>
+                                <div class="input-container" style="margin-bottom: 2px;">
+                                    <input type="number" name="{{ field_name }}" id="{{ field_name }}"
+                                        value="{{ fileset_dashboard_widths[field_name][0] }}" min="1">
+                                </div>
+                                <div class="current-value" style="margin-top: 0px;">Default: {{ fileset_dashboard_widths[field_name][1] }}%</div>
+                            </div>
+                            {% endfor %}
+                        </div>
+                        
+                        <h4>Field Widths for Log Dashboard</h4>
+                        <div class="width-container">
+                            {% for field_name, label in log_fields %}
+                            <div style="display: flex; flex-direction: column; gap: 10px">
+                                <label for="{{ field_name }}">{{ label }}:</label>
+                                <div class="input-container" style="margin-bottom: 2px;">
+                                    <input type="number" name="{{ field_name }}" id="{{ field_name }}"
+                                        value="{{ log_dashboard_widths[field_name][0] }}" min="1">
+                                </div>
+                                <div class="current-value" style="margin-top: 0px;">Default: {{ log_dashboard_widths[field_name][1] }}%</div>
+                            </div>
+                            {% endfor %}
+                        </div>
+                        <div class="submit-section">
+                            <button type="submit">Save Configuration</button>
+                        </div>
                     </div>
-                    <div class="submit-section">
-                        <button type="submit">Save Configuration</button>
-                    </div>
-                </div>
             </form>
         </div>
     </div>
@@ -166,6 +200,21 @@ <h1 class="title">User Configurations</h1>
             if (value < 1) this.value = 1;
             if (value > 250) this.value = 250;
         });
+        {% for field_name, label in fileset_fields %}
+        document.getElementById('{{ field_name }}').addEventListener('input', function () {
+            const value = parseInt(this.value);
+            if (value < 1) this.value = 1;
+            if (value > 50) this.value = 50;
+        });
+        {% endfor %}
+        {% for field_name, label in log_fields %}
+        document.getElementById('{{ field_name }}').addEventListener('input', function () {
+            const value = parseInt(this.value);
+            if (value < 1) this.value = 1;
+            if (value > 50) this.value = 50;
+        });
+        {% endfor %}
+
     </script>
 </body>
 
diff --git a/templates/pagination/navbar.html b/templates/pagination/navbar.html
new file mode 100644
index 00000000..227428d1
--- /dev/null
+++ b/templates/pagination/navbar.html
@@ -0,0 +1,22 @@
+<!DOCTYPE html>
+    <html>
+    <head>
+        <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
+        <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png">
+        <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png">
+    </head>
+    <body>
+    <nav>
+        <div class="logo">
+            <a href="{{ url_for('home') }}">
+                <img src="{{ url_for('static', filename='integrity_service_logo_256.png') }}" alt="Logo">
+            </a>
+        </div>
+        <div class="nav-buttons">
+            <a href="{{ url_for('user_games_list') }}">User Games List</a>
+            <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
+            <a href="{{ url_for('config') }}">Config</a>
+        </div>
+    </nav>

From 72c8da7bb3ded0da7c6101bbc474e459ad2888b6 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 1 Aug 2025 15:23:43 +0530
Subject: [PATCH 31/47] INTEGRITY: Add metadata information in seeding logs.

---
 db_functions.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index e478dbb0..ed190056 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -529,9 +529,7 @@ def db_insert(data_arr, username=None, skiplog=False):
             insert_game(
                 engine_name, engineid, title, gameid, extra, platform, lang, conn
             )
-
-            log_text = f"size {os.path.getsize(filepath)}, author {author}, version {version}. State {status}."
-
+            log_text = f"Engine Name - {engine_name}, Engine ID - {engineid}, Game ID - {gameid}, Title - {title}, Extra - {extra}, Platform - {platform}, Language - {lang}."
             if insert_fileset(
                 src,
                 detection,

From 587321635f0768f752beeff422ce5a192d41858a Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 1 Aug 2025 15:29:58 +0530
Subject: [PATCH 32/47] INTEGRITY: Move html text file to static folder.

---
 pagination.py                                              | 3 ++-
 templates/pagination/navbar.html => static/navbar.html.txt | 0
 templates/config.html                                      | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)
 rename templates/pagination/navbar.html => static/navbar.html.txt (100%)

diff --git a/pagination.py b/pagination.py
index 530464b6..c6b6e761 100644
--- a/pagination.py
+++ b/pagination.py
@@ -136,7 +136,8 @@ def create_page(
 
     # Initial html code including the navbar is stored in a separate html file.
     html = ""
-    with open("templates/pagination/navbar.html", "r") as f:
+    navbar_path = os.path.join(app.root_path, "static", "navbar.html.txt")
+    with open(navbar_path, "r") as f:
         html = f.read()
 
     # Generate HTML
diff --git a/templates/pagination/navbar.html b/static/navbar.html.txt
similarity index 100%
rename from templates/pagination/navbar.html
rename to static/navbar.html.txt
diff --git a/templates/config.html b/templates/config.html
index 0a7b6b93..8dc90b26 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -204,14 +204,14 @@ <h4>Field Widths for Log Dashboard</h4>
         document.getElementById('{{ field_name }}').addEventListener('input', function () {
             const value = parseInt(this.value);
             if (value < 1) this.value = 1;
-            if (value > 50) this.value = 50;
+            if (value > 70) this.value = 70;
         });
         {% endfor %}
         {% for field_name, label in log_fields %}
         document.getElementById('{{ field_name }}').addEventListener('input', function () {
             const value = parseInt(this.value);
             if (value < 1) this.value = 1;
-            if (value > 50) this.value = 50;
+            if (value > 70) this.value = 70;
         });
         {% endfor %}
 

From f507ad525f9911661fdbc0a458a3cdff52b85436 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 1 Aug 2025 15:52:23 +0530
Subject: [PATCH 33/47] INTEGRITY: Add visual symbols for sorting.

---
 pagination.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/pagination.py b/pagination.py
index c6b6e761..236455ab 100644
--- a/pagination.py
+++ b/pagination.py
@@ -192,14 +192,23 @@ def create_page(
         html += "</tr>"
 
     html += "<th>S. No.</th>"
+    current_sort = request.args.get("sort", "")
+    sort_key, sort_dir = (current_sort.split("-") + ["asc"])[:2]
+
     for key in filters.keys():
-        vars = "&".join([f"{k}={v}" for k, v in request.args.items() if k != "sort"])
-        sort = request.args.get("sort", "")
-        if sort == key:
-            vars += f"&sort={key}-desc"
+        base_params = {k: v for k, v in request.args.items() if k != "sort"}
+
+        if key == sort_key:
+            next_sort_dir = "asc" if sort_dir == "desc" else "desc"
+            arrow = "▼" if sort_dir == "desc" else "▲"
+            sort_param = f"{key}-{next_sort_dir}"
         else:
-            vars += f"&sort={key}"
-        html += f"<th><a href='{filename}?{vars}'>{key}</a></th>"
+            arrow = ""
+            sort_param = f"{key}-asc"
+
+        base_params["sort"] = sort_param
+        query_string = "&".join(f"{k}={v}" for k, v in base_params.items())
+        html += f"<th><a href='{filename}?{query_string}'>{key} {arrow}</a></th>"
 
     if results:
         counter = offset + 1

From 212cd13bad0188df91e55c3398a7fa51c5e2f309 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 2 Aug 2025 18:41:57 +0530
Subject: [PATCH 34/47] INTEGRITY: Add checksum filtering in fileset search
 page.

---
 fileset.py             |  17 ++++---
 pagination.py          |  31 +++++++----
 static/navbar.html.txt |   2 +-
 static/style.css       |   2 +
 style.css              | 113 -----------------------------------------
 templates/config.html  |   2 +-
 templates/home.html    |   2 +-
 7 files changed, 36 insertions(+), 133 deletions(-)
 delete mode 100644 style.css

diff --git a/fileset.py b/fileset.py
index aaffe4a3..4ce8b810 100644
--- a/fileset.py
+++ b/fileset.py
@@ -142,7 +142,7 @@ def fileset():
                 <div class="nav-buttons">
                     <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
-                    <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                    <a href="{{{{ url_for('fileset_search', sort='fileset-asc') }}}}">Fileset Search</a>
                     <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
                     <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
@@ -508,7 +508,7 @@ def merge_fileset(id):
                     <div class="nav-buttons">
                         <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                         <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
-                        <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                        <a href="{{{{ url_for('fileset_search', sort='fileset-asc') }}}}">Fileset Search</a>
                         <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
                         <a href="{{{{ url_for('config') }}}}">Config</a>
                     </div>
@@ -558,7 +558,7 @@ def merge_fileset(id):
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
             <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
@@ -628,7 +628,7 @@ def possible_merge_filesets(id):
                 <div class="nav-buttons">
                     <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
-                    <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                    <a href="{{{{ url_for('fileset_search', sort='fileset-asc') }}}}">Fileset Search</a>
                     <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
                     <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
@@ -833,7 +833,7 @@ def highlight_differences(source, target):
                 <div class="nav-buttons">
                     <a href="{{ url_for('user_games_list') }}">User Games List</a>
                     <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-                    <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+                    <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
                     <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
                     <a href="{{ url_for('config') }}">Config</a>
                 </div>
@@ -1628,11 +1628,13 @@ def fileset_search():
     filename = "fileset_search"
     records_table = "fileset"
     select_query = """
-    SELECT fileset.id as fileset, engineid, game.gameid, extra, platform, language, status, transaction
+    SELECT DISTINCT fileset.id as fileset, engineid, game.gameid, extra, platform, language, status, transaction
     FROM fileset
     LEFT JOIN game ON game.id = fileset.game
     LEFT JOIN engine ON engine.id = game.engine
     JOIN transactions ON fileset.id = transactions.fileset
+    JOIN file ON fileset.id = file.fileset
+    JOIN filechecksum ON file.id = filechecksum.file
     """
     order = "ORDER BY fileset.id"
     filters = {
@@ -1644,11 +1646,14 @@ def fileset_search():
         "language": "game",
         "status": "fileset",
         "transaction": "transactions",
+        "checksum": "filechecksum",
     }
     mapping = {
         "game.id": "fileset.game",
         "engine.id": "game.engine",
         "fileset.id": "transactions.fileset",
+        "file.fileset": "fileset.id",
+        "file.id": "filechecksum.file",
     }
     filesets_per_page = get_filesets_per_page()
     return render_template_string(
diff --git a/pagination.py b/pagination.py
index 236455ab..d3a84f42 100644
--- a/pagination.py
+++ b/pagination.py
@@ -98,15 +98,18 @@ def create_page(
                         from_query += " JOIN engine ON engine.id = game.engine"
                     else:
                         from_query += " JOIN game ON game.id = fileset.game JOIN engine ON engine.id = game.engine"
+                if t == "filechecksum":
+                    from_query += " JOIN file ON file.fileset = fileset.id JOIN filechecksum ON file.id = filechecksum.file"
                 else:
                     from_query += (
                         f" JOIN {t} ON {get_join_columns(records_table, t, mapping)}"
                     )
 
         base_table = records_table.split(" ")[0]
-        cursor.execute(
-            f"SELECT COUNT({base_table}.id) AS count FROM {from_query} {condition}"
-        )
+        query = f"""
+            SELECT COUNT(DISTINCT {base_table}.id) AS count FROM {from_query} {condition}
+        """
+        cursor.execute(query)
         num_of_results = cursor.fetchone()["count"]
 
         num_of_pages = (num_of_results + results_per_page - 1) // results_per_page
@@ -128,6 +131,8 @@ def create_page(
         else:
             if records_table == "log":
                 order = "ORDER BY `id` DESC"
+            if records_table == "fileset":
+                order = "ORDER BY fileset ASC"
 
         # Fetch results
         query = f"{select_query} {condition} {order} LIMIT {results_per_page} OFFSET {offset}"
@@ -181,14 +186,17 @@ def create_page(
         html += "</colgroup>"
 
     if filters:
-        if records_table != "log":
-            html += """<tr class='filter'><td class='filter'><input type='submit' value='Submit'></td>"""
-        else:
-            html += """<tr class='filter'><td class='filter'><input type='submit' value='Submit'></td>"""
-
+        html += """<tr class='filter'><td class='filter'><input type='submit' value='Submit'></td>"""
         for key in filters.keys():
+            if key == "checksum":
+                continue
             filter_value = request.args.get(key, "")
-            html += f"<td class='filter'><input type='text' class='filter' placeholder='{key}' name='{key}' value='{filter_value}'/></td>"
+            if key == "transaction":
+                html += f"<td style='display: flex;' class='filter'><input type='text' class='filter' placeholder='{key}' name='{key}' value='{filter_value}'/>"
+                filter_value = request.args.get("checksum", "")
+                html += f"<input type='text' class='filter' placeholder='checksum' name='checksum' value='{filter_value}'/></td>"
+            else:
+                html += f"<td class='filter'><input type='text' class='filter' placeholder='{key}' name='{key}' value='{filter_value}'/></td>"
         html += "</tr>"
 
     html += "<th>S. No.</th>"
@@ -203,12 +211,13 @@ def create_page(
             arrow = "▼" if sort_dir == "desc" else "▲"
             sort_param = f"{key}-{next_sort_dir}"
         else:
-            arrow = ""
+            arrow = "⬍"
             sort_param = f"{key}-asc"
 
         base_params["sort"] = sort_param
         query_string = "&".join(f"{k}={v}" for k, v in base_params.items())
-        html += f"<th><a href='{filename}?{query_string}'>{key} {arrow}</a></th>"
+        if key != "checksum":
+            html += f"<th><a href='{filename}?{query_string}'>{key} {arrow}</a></th>"
 
     if results:
         counter = offset + 1
diff --git a/static/navbar.html.txt b/static/navbar.html.txt
index 227428d1..1a66f9a3 100644
--- a/static/navbar.html.txt
+++ b/static/navbar.html.txt
@@ -15,7 +15,7 @@
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
             <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
diff --git a/static/style.css b/static/style.css
index b93da503..7ec3f733 100644
--- a/static/style.css
+++ b/static/style.css
@@ -38,6 +38,8 @@ th {
   text-align: center;
   background-color: var(--primary-color);
   color: white;
+  height: 30px;
+  vertical-align: middle;
 }
 
 th a {
diff --git a/style.css b/style.css
deleted file mode 100644
index 1c9e599c..00000000
--- a/style.css
+++ /dev/null
@@ -1,113 +0,0 @@
-:root {
-  --primary-color: #27b5e8;
-  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-}
-
-td, th {
-  padding-inline: 5px;
-}
-
-tr:nth-child(even) {background-color: #f2f2f2;}
-tr {background-color: white;}
-
-tr:hover {background-color: #ddd;}
-tr.games_list:hover {cursor: pointer;}
-
-tr.filter:hover {background-color:inherit;}
-td.filter {text-align: center;}
-
-th {
-  padding-top: 5px;
-  padding-bottom: 5px;
-  text-align: center;
-  background-color: var(--primary-color);
-  color: white;
-}
-
-th a {
-  color: white;
-  text-decoration: none; /* no underline */
-}
-
-button {
-  color: white;
-  padding: 6px 12px;
-  border-radius: 10px;
-  transition: background-color 0.1s;
-  background-color: var(--primary-color);
-  border: 1px solid var(--primary-color);
-}
-
-button:hover {
-  background-color: #29afe0;
-}
-button:active {
-  background-color: #1a95c2;
-}
-
-input[type=submit] {
-  color: white;
-  padding: 6px 12px;
-  border-radius: 10px;
-  transition: background-color 0.1s;
-  background-color: var(--primary-color);
-  border: 1px solid var(--primary-color);
-}
-
-input[type=submit]:hover {
-  background-color: #29afe0;
-}
-input[type=submit]:active {
-  background-color: #1a95c2;
-}
-
-input[type=text], select {
-  width: 25%;
-  height: 38px;
-  padding: 6px 12px;
-  margin: 0px 8px;
-  display: inline-block;
-  border: 1px solid #ccc;
-  border-radius: 4px;
-  box-sizing: border-box;
-}
-
-input[type=text].filter {
-  width: 80%;
-}
-
-.pagination {
-  display: inline-block;
-  align-self: center;
-}
-
-.pagination .more {
-  color: black;
-  float: left;
-  padding: 15px 10px;
-}
-
-.pagination a {
-  color: black;
-  float: left;
-  padding: 8px 16px;
-  text-decoration: none;
-  transition: background-color 0.3s;
-  border: 1px solid #ddd;
-}
-
-.pagination a.active {
-  color: white;
-  background-color: var(--primary-color);
-  border: 1px solid var(--primary-color);
-}
-
-.pagination a:hover:not(.active) {
-  background-color: #ddd;
-}
-
-form {
-  padding: 0px;
-  margin: 0px;
-  display: inline;
-}
diff --git a/templates/config.html b/templates/config.html
index 8dc90b26..75f50d20 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -130,7 +130,7 @@
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
             <a href="{{ url_for('logs', sort='id-desc')}}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
diff --git a/templates/home.html b/templates/home.html
index 19536411..dc662529 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -90,7 +90,7 @@
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
             <a href="{{ url_for('logs', sort='id-desc')}}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>

From a1c97c31ad505df30a04c11a27b08ebf6e4883b0 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 2 Aug 2025 18:44:24 +0530
Subject: [PATCH 35/47] INTEGRITY: Encode url variables before changing page.

---
 pagination.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pagination.py b/pagination.py
index d3a84f42..b71e77f5 100644
--- a/pagination.py
+++ b/pagination.py
@@ -4,6 +4,8 @@
 import re
 import os
 
+from urllib.parse import urlencode
+
 
 app = Flask(__name__)
 
@@ -77,7 +79,7 @@ def create_page(
             col = f"{filters[key]}.{'id' if key == 'fileset' else key}"
             parsed = build_search_condition(value, col)
             if parsed:
-                where_clauses.append(parsed)
+                where_clauses.append("(" + parsed + ")")
 
         condition = ""
         if where_clauses:
@@ -265,9 +267,10 @@ def create_page(
     if not results:
         html += "<h1>No results for given filters</h1>"
 
-    # Pagination
-    vars = "&".join([f"{k}={v}" for k, v in request.args.items() if k != "page"])
+    # Encoding url variable again to url portable form
+    vars = urlencode({k: v for k, v in request.args.items() if k != "page"})
 
+    # Pagination
     if num_of_pages > 1:
         html += "<form method='GET'>"
         for key, value in request.args.items():

From d8665df58c82496359a055b2b2295d0581847bbe Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sun, 3 Aug 2025 15:15:40 +0530
Subject: [PATCH 36/47] INTEGRITY: Fix the fileset details being displayed in
 merge dashboard.

---
 fileset.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/fileset.py b/fileset.py
index 4ce8b810..c36e3521 100644
--- a/fileset.py
+++ b/fileset.py
@@ -730,17 +730,20 @@ def confirm_merge(id):
             cursor.execute(
                 """
                 SELECT 
-                    fs.*, 
+                    fs.id, fs.status, fs.src, fs.`key`, fs.megakey,
+                    fs.timestamp, fs.detection_size, fs.set_dat_metadata,
                     g.name AS game_name, 
-                    g.engine AS game_engine, 
+                    e.name AS game_engine,
                     g.platform AS game_platform,
                     g.language AS game_language,
                     (SELECT COUNT(*) FROM file WHERE fileset = fs.id) AS file_count
-                FROM 
+                FROM
                     fileset fs
-                LEFT JOIN 
+                LEFT JOIN
                     game g ON fs.game = g.id
-                WHERE 
+                LEFT JOIN
+                    engine e ON g.engine = e.id
+                WHERE
                     fs.id = %s
             """,
                 (id,),
@@ -761,9 +764,10 @@ def confirm_merge(id):
             cursor.execute(
                 """
                 SELECT 
-                    fs.*, 
-                    g.name AS game_name, 
-                    g.engine AS game_engine, 
+                    fs.id, fs.status, fs.src, fs.`key`, fs.megakey,
+                    fs.timestamp, fs.detection_size, fs.set_dat_metadata,
+                    g.name AS game_name,
+                    e.name AS game_engine,
                     g.platform AS game_platform,
                     g.language AS game_language,
                     (SELECT COUNT(*) FROM file WHERE fileset = fs.id) AS file_count
@@ -771,6 +775,8 @@ def confirm_merge(id):
                     fileset fs
                 LEFT JOIN 
                     game g ON fs.game = g.id
+                LEFT JOIN
+                    engine e ON g.engine = e.id
                 WHERE 
                     fs.id = %s
             """,
@@ -844,6 +850,7 @@ def highlight_differences(source, target):
             <tr><th style="width: 50px;">Field</th><th style="width: 1000px;">Source Fileset</th><th style="width: 1000px;">Target Fileset</th></tr>
             """
 
+            # Fileset metadata
             for column in source_fileset.keys():
                 source_value = str(source_fileset[column])
                 target_value = str(target_fileset[column])
@@ -959,10 +966,8 @@ def highlight_differences(source, target):
                             }
                         )
                     )
-                    if (
-                        os.path.basename(matched_source_filename).lower()
-                        in detection_files_set
-                    ):
+
+                    if matched_source_filename.lower() in detection_files_set:
                         target_val = html_lib.escape(
                             json.dumps(
                                 {

From 3a95049c441e0c4c0730aa98ccf92a8b6f547b6d Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sun, 3 Aug 2025 22:15:50 +0530
Subject: [PATCH 37/47] INTEGRITY: Add default state for sorting along with
 ascending and descending.

---
 fileset.py                                    |  22 +++++------
 pagination.py                                 |  36 ++++++++++++++----
 static/icons/filter/arrow_drop_down.png       | Bin 0 -> 174 bytes
 static/icons/filter/arrow_drop_up.png         | Bin 0 -> 174 bytes
 static/icons/filter/unfold_more.png           | Bin 0 -> 314 bytes
 .../{navbar.html.txt => navbar_string.html}   |   4 +-
 static/style.css                              |  14 +++++++
 templates/config.html                         |   4 +-
 templates/home.html                           |   4 +-
 9 files changed, 59 insertions(+), 25 deletions(-)
 create mode 100644 static/icons/filter/arrow_drop_down.png
 create mode 100644 static/icons/filter/arrow_drop_up.png
 create mode 100644 static/icons/filter/unfold_more.png
 rename static/{navbar.html.txt => navbar_string.html} (83%)

diff --git a/fileset.py b/fileset.py
index c36e3521..e59aa384 100644
--- a/fileset.py
+++ b/fileset.py
@@ -37,7 +37,7 @@
 
 @app.route("/")
 def index():
-    return redirect(url_for("logs", sort="id-desc"))
+    return redirect(url_for("logs"))
 
 
 @app.route("/home")
@@ -142,8 +142,8 @@ def fileset():
                 <div class="nav-buttons">
                     <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
-                    <a href="{{{{ url_for('fileset_search', sort='fileset-asc') }}}}">Fileset Search</a>
-                    <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
+                    <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                    <a href="{{{{ url_for('logs') }}}}">Logs</a>
                     <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
             </nav>
@@ -508,8 +508,8 @@ def merge_fileset(id):
                     <div class="nav-buttons">
                         <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                         <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
-                        <a href="{{{{ url_for('fileset_search', sort='fileset-asc') }}}}">Fileset Search</a>
-                        <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
+                        <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                        <a href="{{{{ url_for('logs') }}}}">Logs</a>
                         <a href="{{{{ url_for('config') }}}}">Config</a>
                     </div>
                 </nav>
@@ -558,8 +558,8 @@ def merge_fileset(id):
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
-            <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs') }}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
@@ -628,8 +628,8 @@ def possible_merge_filesets(id):
                 <div class="nav-buttons">
                     <a href="{{{{ url_for('user_games_list') }}}}">User Games List</a>
                     <a href="{{{{ url_for('ready_for_review') }}}}">Ready for review</a>
-                    <a href="{{{{ url_for('fileset_search', sort='fileset-asc') }}}}">Fileset Search</a>
-                    <a href="{{{{ url_for('logs', sort='id-desc') }}}}">Logs</a>
+                    <a href="{{{{ url_for('fileset_search') }}}}">Fileset Search</a>
+                    <a href="{{{{ url_for('logs') }}}}">Logs</a>
                     <a href="{{{{ url_for('config') }}}}">Config</a>
                 </div>
             </nav>
@@ -839,8 +839,8 @@ def highlight_differences(source, target):
                 <div class="nav-buttons">
                     <a href="{{ url_for('user_games_list') }}">User Games List</a>
                     <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-                    <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
-                    <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
+                    <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+                    <a href="{{ url_for('logs') }}">Logs</a>
                     <a href="{{ url_for('config') }}">Config</a>
                 </div>
             </nav>
diff --git a/pagination.py b/pagination.py
index b71e77f5..f611cb6f 100644
--- a/pagination.py
+++ b/pagination.py
@@ -1,4 +1,4 @@
-from flask import Flask, request
+from flask import Flask, request, url_for
 import pymysql
 import json
 import re
@@ -143,7 +143,7 @@ def create_page(
 
     # Initial html code including the navbar is stored in a separate html file.
     html = ""
-    navbar_path = os.path.join(app.root_path, "static", "navbar.html.txt")
+    navbar_path = os.path.join(app.root_path, "static", "navbar_string.html")
     with open(navbar_path, "r") as f:
         html = f.read()
 
@@ -205,21 +205,41 @@ def create_page(
     current_sort = request.args.get("sort", "")
     sort_key, sort_dir = (current_sort.split("-") + ["asc"])[:2]
 
+    # Adding heading links with sorting
     for key in filters.keys():
         base_params = {k: v for k, v in request.args.items() if k != "sort"}
+        icon_path = "icons/filter/"
+        icon_name = ""
 
         if key == sort_key:
-            next_sort_dir = "asc" if sort_dir == "desc" else "desc"
-            arrow = "▼" if sort_dir == "desc" else "▲"
-            sort_param = f"{key}-{next_sort_dir}"
+            if sort_dir == "asc":
+                next_sort_dir = "desc"
+                icon_name = "arrow_drop_up.png"
+            elif sort_dir == "desc":
+                next_sort_dir = "default"
+                icon_name = "arrow_drop_down.png"
+            else:
+                next_sort_dir = "asc"
+                icon_name = "unfold_more.png"
+
+            if next_sort_dir != "default":
+                sort_param = f"{key}-{next_sort_dir}"
+                base_params["sort"] = sort_param
         else:
-            arrow = "⬍"
+            icon_name = "unfold_more.png"
             sort_param = f"{key}-asc"
+            base_params["sort"] = sort_param
 
-        base_params["sort"] = sort_param
         query_string = "&".join(f"{k}={v}" for k, v in base_params.items())
         if key != "checksum":
-            html += f"<th><a href='{filename}?{query_string}'>{key} {arrow}</a></th>"
+            icon_src = url_for("static", filename=icon_path + icon_name)
+            html += f"""<th>
+                <a href='{filename}?{query_string}' class="header-link">
+                    <span></span>
+                    <span class="key-text">{key}</span>
+                    <img class="filter-icon" src="{icon_src}" alt="asc" width="25">
+                </a>
+            </th>"""
 
     if results:
         counter = offset + 1
diff --git a/static/icons/filter/arrow_drop_down.png b/static/icons/filter/arrow_drop_down.png
new file mode 100644
index 0000000000000000000000000000000000000000..b0ec865ce30cab92520270a3b1717473b5c5edb8
GIT binary patch
literal 174
zcmeAS@N?(olHy`uVBq!ia0vp^5+KaM1|%Pp+x`GjjKx9jP7LeL$-D$|GCW-zLp;2b
zQx+H|EZO*zXR(+7&%=ufQZF69{CRqt&CRW?s^;N`|Mt#{m+UBD)-z=Z7PRx`kmTDm
zWdW;#>kS>nV+D(J9k#bb9QNGAkjB|@O#Io4LdOsRL7z7jB0W(K24CEoGgw#z8SMWl
VNH%c)n+CL#!PC{xWt~$(697SBI*I@Q

literal 0
HcmV?d00001

diff --git a/static/icons/filter/arrow_drop_up.png b/static/icons/filter/arrow_drop_up.png
new file mode 100644
index 0000000000000000000000000000000000000000..0c8f19176cc697e2ce637692b84484c779704e88
GIT binary patch
literal 174
zcmeAS@N?(olHy`uVBq!ia0vp^5+KaM1|%Pp+x`GjjKx9jP7LeL$-D$|GCW-zLp;2b
zQx+H|EZO*zXR(+7&%=ufQb~?q{ye?SHes^r(bj_|h7B(~*d))LI1#{a^yPnjn_H5u
z;wh$-!-?zEt_d}XB^b{U@%|9iaJ~R2{(?D4NofA8KyHq<2N?prY=Q>`6jm@WTuE1u
UH0KSU3bd2K)78&qol`;+0B!0zBLDyZ

literal 0
HcmV?d00001

diff --git a/static/icons/filter/unfold_more.png b/static/icons/filter/unfold_more.png
new file mode 100644
index 0000000000000000000000000000000000000000..7d5d5982986624dc88ac725745c7ce723cb5b2cd
GIT binary patch
literal 314
zcmV-A0mc4_P)<h;3K|Lk000e1NJLTq000;O000;W1^@s6;CDUv00001b5ch_0Itp)
z=>Px#^hrcPR7gwhl|c@IFc3v&xPd_+UFlKaCUO$tBzO}&imnKt@dk`(v7{wRM+}KZ
zyXu1cH$R>KL74gA%=++IfU-Vsq+<~-cU<Re2T~jX3n@%MDHp;5nBsbJF2o72CMU=x
z01>uaC!>I}X<{UAufzy)5&4<Cm(?ZfA=oA&2oBHQ#Oa`_FGWjm(&Ac3LOg_36#D7d
zgF6A-ZJF%g5BPyMkn0FY$6+<7ULsr%*LS!Sj-Xv*q_zv1(LYwY(Mv?Im>x7MwLmIV
zed!tLek%tiq%S*$csC65SWcMYF9CGPsUOb0^f<Gz9dN`Ree?Z*8v;0N85*d5mH+?%
M07*qoM6N<$f~iA&-v9sr

literal 0
HcmV?d00001

diff --git a/static/navbar.html.txt b/static/navbar_string.html
similarity index 83%
rename from static/navbar.html.txt
rename to static/navbar_string.html
index 1a66f9a3..2a80d9e4 100644
--- a/static/navbar.html.txt
+++ b/static/navbar_string.html
@@ -15,8 +15,8 @@
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
-            <a href="{{ url_for('logs', sort='id-desc') }}">Logs</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs') }}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
diff --git a/static/style.css b/static/style.css
index 7ec3f733..720e9f77 100644
--- a/static/style.css
+++ b/static/style.css
@@ -78,6 +78,20 @@ nav {
   vertical-align: middle;
 }
 
+.header-link {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  text-decoration: none;
+  color: inherit;
+  padding: 4px 8px;
+}
+
+.filter-icon {
+  height: auto;
+  margin-left: 8px;
+}
+
 button {
   color: white;
   padding: 6px 12px;
diff --git a/templates/config.html b/templates/config.html
index 75f50d20..64b616b1 100644
--- a/templates/config.html
+++ b/templates/config.html
@@ -130,8 +130,8 @@
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
-            <a href="{{ url_for('logs', sort='id-desc')}}">Logs</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs')}}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
     </nav>
diff --git a/templates/home.html b/templates/home.html
index dc662529..15691d38 100644
--- a/templates/home.html
+++ b/templates/home.html
@@ -90,8 +90,8 @@
         <div class="nav-buttons">
             <a href="{{ url_for('user_games_list') }}">User Games List</a>
             <a href="{{ url_for('ready_for_review') }}">Ready for review</a>
-            <a href="{{ url_for('fileset_search', sort='fileset-asc') }}">Fileset Search</a>
-            <a href="{{ url_for('logs', sort='id-desc')}}">Logs</a>
+            <a href="{{ url_for('fileset_search') }}">Fileset Search</a>
+            <a href="{{ url_for('logs')}}">Logs</a>
             <a href="{{ url_for('config') }}">Config</a>
         </div>
         <div class="dev">

From fab9876972fc07b8a91832189e5f3399ba31e19f Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sun, 3 Aug 2025 22:55:42 +0530
Subject: [PATCH 38/47] INTEGRITY: Refactor confirm merge code.

---
 db_functions.py                         |  37 +-
 fileset.py                              | 508 ++++++++----------------
 pagination.py                           |   2 +-
 static/js/confirm_merge_form_handler.js |   3 +-
 static/js/update_merge_table_rows.js    |  53 +++
 5 files changed, 249 insertions(+), 354 deletions(-)
 create mode 100644 static/js/update_merge_table_rows.js

diff --git a/db_functions.py b/db_functions.py
index ed190056..daec2550 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -114,8 +114,6 @@ def insert_fileset(
 ):
     status = "detection" if detection else src
     game = "NULL"
-    key = "NULL" if key == "" else key
-    megakey = "NULL" if megakey == "" else megakey
 
     if detection:
         status = "detection"
@@ -212,7 +210,7 @@ def normalised_path(name):
     return "/".join(path_list)
 
 
-def insert_file(file, detection, src, conn):
+def insert_file(file, detection, src, conn, fileset_id=None):
     # Find full md5, or else use first checksum value
     checksum = ""
     checksize = 5000
@@ -249,18 +247,27 @@ def insert_file(file, detection, src, conn):
     values.extend([checksum, detection, detection_type])
 
     # Parameterised Query
-    query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, fileset, detection, detection_type, `timestamp` ) VALUES (%s, %s, %s, %s, %s, %s, @fileset_last, %s, %s, NOW())"
-
     with conn.cursor() as cursor:
-        cursor.execute(query, values)
+        query = ""
+        if fileset_id is None:
+            query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, detection, detection_type, `timestamp`, fileset ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), @fileset_last)"
+            cursor.execute(query, values)
+        else:
+            query = "INSERT INTO file ( name, size, `size-r`, `size-rd`, `modification-time`, checksum, detection, detection_type, `timestamp`, fileset ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, NOW(), %s)"
+            values.append(fileset_id)
+            cursor.execute(query, values)
 
-    if detection:
-        with conn.cursor() as cursor:
-            cursor.execute(
-                "UPDATE fileset SET detection_size = %s WHERE id = @fileset_last AND detection_size IS NULL",
-                (checksize,),
-            )
-    with conn.cursor() as cursor:
+        if detection:
+            if fileset_id is None:
+                cursor.execute(
+                    "UPDATE fileset SET detection_size = %s WHERE id = @fileset_last AND detection_size IS NULL",
+                    (checksize,),
+                )
+            else:
+                cursor.execute(
+                    "UPDATE fileset SET detection_size = %s WHERE id = %s AND detection_size IS NULL",
+                    (checksize, fileset_id),
+                )
         cursor.execute("SET @file_last = LAST_INSERT_ID()")
 
 
@@ -270,6 +277,8 @@ def insert_filechecksum(file, checktype, file_id, conn):
 
     checksum = file[checktype]
     checksize, checktype, checksum = get_checksum_props(checktype, checksum)
+    if checksize == "1048576":
+        checksize = "1M"
 
     query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
     with conn.cursor() as cursor:
@@ -2480,7 +2489,7 @@ def create_user_fileset(fileset, game_metadata, src, transaction_id, user, conn,
             return
 
         (fileset_id, _) = insert_fileset(
-            src, False, key, None, transaction_id, None, conn, ip=ip
+            src, False, key, "", transaction_id, None, conn, ip=ip
         )
 
         insert_game(engine_name, engineid, title, gameid, extra, platform, lang, conn)
diff --git a/fileset.py b/fileset.py
index e59aa384..c0cacd5a 100644
--- a/fileset.py
+++ b/fileset.py
@@ -23,9 +23,10 @@
     db_connect,
     create_log,
     db_connect_root,
-    get_checksum_props,
     delete_original_fileset,
     normalised_path,
+    insert_file,
+    insert_filechecksum,
 )
 from collections import defaultdict
 from schema import init_database
@@ -852,8 +853,16 @@ def highlight_differences(source, target):
 
             # Fileset metadata
             for column in source_fileset.keys():
-                source_value = str(source_fileset[column])
-                target_value = str(target_fileset[column])
+                source_value = (
+                    ""
+                    if str(source_fileset[column]) == "None"
+                    else str(source_fileset[column])
+                )
+                target_value = (
+                    ""
+                    if str(target_fileset[column]) == "None"
+                    else str(target_fileset[column])
+                )
                 if column == "id":
                     html += f"<tr><td>{column}</td><td><a href='/fileset?id={source_value}'>{source_value}</a></td><td><a href='/fileset?id={target_value}'>{target_value}</a></td></tr>"
                     continue
@@ -912,138 +921,55 @@ def highlight_differences(source, target):
                     if file["detection"] == 1:
                         detection_files_set.add(file["name"].lower())
 
-            html += """<tr><th>Files</th><td colspan='2'><label><input type="checkbox" id="toggle-unmatched"> Show Unmatched Files</label></td></tr>"""
+            html += """<tr><th>Files</th><td colspan='2'><label><input type="checkbox" id="toggle-common-files"> Show Only Common Files</label><label style='margin-left: 50px;' ><input type="checkbox" id="toggle-all-fields"> Show All Fields</label></td></tr>"""
 
             all_source_unmatched_filenames = sorted(set(source_files_map.keys()))
             all_target_unmatched_filenames = sorted(set(target_files_map.keys()))
 
-            for matched_target_filename, matched_source_filename in matched_files:
-                if matched_source_filename.lower() in all_source_unmatched_filenames:
-                    all_source_unmatched_filenames.remove(
-                        matched_source_filename.lower()
-                    )
-                if matched_target_filename.lower() in all_target_unmatched_filenames:
-                    all_target_unmatched_filenames.remove(
-                        matched_target_filename.lower()
-                    )
-                source_dict = source_files_map.get(matched_source_filename.lower(), {})
-                target_dict = target_files_map.get(matched_target_filename.lower(), {})
-
-                # html += f"""<tr><th>{matched_source_filename}</th><th>Source File</th><th>Target File</th></tr>"""
-
-                keys = sorted(set(source_dict.keys()) | set(target_dict.keys()))
-
-                group_id = f"group_{matched_source_filename.lower().replace('.', '_').replace('/', '_')}_{matched_target_filename.lower().replace('.', '_').replace('/', '_')}"
-                html += f"""<tr>
-                    <td colspan='3'>
-                        <label>
-                            <input type="checkbox" onclick="toggleGroup('{group_id}')">
-                            Show all fields for <strong>{matched_source_filename}</strong>
-                        </label>
-                    </td>
-                </tr>"""
-
-                for key in keys:
-                    source_value = str(source_dict.get(key, ""))
-                    target_value = str(target_dict.get(key, ""))
-
-                    source_checked = "checked" if key in source_dict else ""
-                    source_checksum = source_files_map[
-                        matched_source_filename.lower()
-                    ].get(key, "")
-                    target_checksum = target_files_map[
-                        matched_target_filename.lower()
-                    ].get(key, "")
-
-                    source_val = html_lib.escape(
-                        json.dumps(
-                            {
-                                "side": "source",
-                                "filename": matched_source_filename,
-                                "prop": key,
-                                "value": source_checksum,
-                                "detection": "0",
-                            }
-                        )
-                    )
+            all_files = [
+                matched_files,
+                all_target_unmatched_filenames,
+                all_source_unmatched_filenames,
+            ]
 
-                    if matched_source_filename.lower() in detection_files_set:
-                        target_val = html_lib.escape(
-                            json.dumps(
-                                {
-                                    "side": "target",
-                                    "filename": matched_source_filename,
-                                    "prop": key,
-                                    "value": target_checksum,
-                                    "detection": "1",
-                                }
+            is_common_file = True
+            for file_category in all_files:
+                # For matched_files, files is a tuple of filename from source file and target file
+                # For unmatched_files, files is the filename of the files that was not common.
+                for files in file_category:
+                    if is_common_file:
+                        (target_filename, source_filename) = files
+
+                        # Also remove common files from source and target filenames set
+                        if source_filename.lower() in all_source_unmatched_filenames:
+                            all_source_unmatched_filenames.remove(
+                                source_filename.lower()
                             )
-                        )
-                    else:
-                        target_val = html_lib.escape(
-                            json.dumps(
-                                {
-                                    "side": "target",
-                                    "filename": matched_target_filename,
-                                    "prop": key,
-                                    "value": target_checksum,
-                                    "detection": "0",
-                                }
+                        if target_filename.lower() in all_target_unmatched_filenames:
+                            all_target_unmatched_filenames.remove(
+                                target_filename.lower()
                             )
-                        )
-                    if source_value != target_value:
-                        source_highlighted, target_highlighted = highlight_differences(
-                            source_value, target_value
-                        )
-                        if key == "md5-full":
-                            html += f"""<tr>
-                                <td>{key}</td>
-                                <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
-                                <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
-                            </tr>"""
-                        else:
-                            html += f"""<tbody class="toggle-details" id="{group_id}" style="display: none;">
-                                <tr>
-                                    <td>{key}</td>
-                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
-                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
-                                </tr>
-                            </tbody>"""
                     else:
-                        if key == "md5-full":
-                            html += f"""<tr>
-                                <td>{key}</td>
-                                <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
-                                <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
-                            </tr>"""
-                        else:
-                            html += f"""<tbody class="toggle-details" id="{group_id}" style="display: none;">
-                                <tr>
-                                    <td>{key}</td>
-                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
-                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
-                                </tr>
-                            </tbody>"""
-
-            all_unmatched_filenames = [
-                all_target_unmatched_filenames,
-                all_source_unmatched_filenames,
-            ]
+                        target_filename = files
+                        source_filename = files
 
-            for unmatched_filenames in all_unmatched_filenames:
-                for filename in unmatched_filenames:
-                    source_dict = source_files_map.get(filename.lower(), {})
-                    target_dict = target_files_map.get(filename.lower(), {})
+                    is_mac_file = False
+                    size = source_files_map[source_filename.lower()].get("size", "")
+                    size_rd = source_files_map[source_filename.lower()].get(
+                        "size-rd", ""
+                    )
+                    if size == "0" and size_rd != "0":
+                        is_mac_file = True
+
+                    source_dict = source_files_map.get(source_filename.lower(), {})
+                    target_dict = target_files_map.get(target_filename.lower(), {})
 
                     keys = sorted(set(source_dict.keys()) | set(target_dict.keys()))
-                    group_id = (
-                        f"group_{filename.lower().replace('.', '_').replace('/', '_')}"
-                    )
-                    html += f"""<tr class="unmatched" style='display: none;'>
+
+                    tr_class = "matched" if is_common_file else "unmatched"
+                    html += f"""<tr class="{tr_class}">
                         <td colspan='3'>
-                            <label>
-                                <input type="checkbox" onclick="toggleGroup('{group_id}')">
-                                Show all fields for <strong>{filename}</strong>
+                                <strong>{source_filename}</strong> {" - mac_file" if is_mac_file else ""}
                             </label>
                         </td>
                     </tr>"""
@@ -1053,82 +979,81 @@ def highlight_differences(source, target):
                         target_value = str(target_dict.get(key, ""))
 
                         source_checked = "checked" if key in source_dict else ""
-                        source_checksum = source_files_map[filename.lower()].get(
+                        source_checksum = source_files_map[source_filename.lower()].get(
                             key, ""
                         )
-                        target_checksum = target_files_map[filename.lower()].get(
+                        target_checksum = target_files_map[target_filename.lower()].get(
                             key, ""
                         )
 
-                        source_val = html_lib.escape(
-                            json.dumps(
-                                {
-                                    "side": "source",
-                                    "filename": filename,
-                                    "prop": key,
-                                    "value": source_checksum,
-                                    "detection": "0",
-                                }
-                            )
-                        )
-                        if filename.lower() in detection_files_set:
-                            target_val = html_lib.escape(
-                                json.dumps(
-                                    {
-                                        "side": "target",
-                                        "filename": filename,
-                                        "prop": key,
-                                        "value": target_checksum,
-                                        "detection": "1",
-                                    }
-                                )
-                            )
-                        else:
-                            target_val = html_lib.escape(
+                        vals = {}
+
+                        # Format the value for the checkbox input as an escaped HTML-safe JSON string
+                        for side, checksum in [
+                            ("source", source_checksum),
+                            ("target", target_checksum),
+                        ]:
+                            is_detection = "0"
+                            if (
+                                side == "target"
+                                and target_filename.lower() in detection_files_set
+                            ):
+                                is_detection = "1"
+
+                            vals[side] = html_lib.escape(
                                 json.dumps(
                                     {
-                                        "side": "target",
-                                        "filename": filename,
+                                        "side": side,
+                                        "filename": target_filename
+                                        if side == "target"
+                                        else source_filename,
                                         "prop": key,
-                                        "value": target_checksum,
-                                        "detection": "0",
+                                        "value": checksum,
+                                        "detection": is_detection,
                                     }
                                 )
                             )
+                        source_val = vals["source"]
+                        target_val = vals["target"]
 
+                        # Update the source and target values with highlighted differences if any
                         if source_value != target_value:
-                            source_highlighted, target_highlighted = (
-                                highlight_differences(source_value, target_value)
+                            source_value, target_value = highlight_differences(
+                                source_value, target_value
                             )
-                            if key == "md5-full":
-                                html += f"""<tr class="unmatched" style='display: none;'">
-                                    <td>{key}</td>
-                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
-                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
-                                </tr>"""
-                            else:
-                                html += f"""<tbody class="toggle-details" id="{group_id}"  style='display: none;'>
-                                    <tr>
-                                        <td>{key}</td>
-                                        <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_highlighted}</td>
-                                        <td><input type="checkbox" name="options[]" value="{target_val}">{target_highlighted}</td>
-                                    </tr>
-                                </tbody>"""
-                        else:
-                            if key == "md5-full":
-                                html += f"""<tr class="unmatched" style='display: none;'>
-                                    <td>{key}</td>
-                                    <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
-                                    <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
-                                </tr>"""
-                            else:
-                                html += f"""<tbody class="toggle-details unmatched" id="{group_id}"  style='display: none;'>
-                                    <tr>
-                                        <td>{key}</td>
-                                        <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
-                                        <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
-                                    </tr>
-                                </tbody>"""
+
+                        is_md5_full = key == "md5-full"
+                        is_size = key == "size"
+                        is_size_rd = key == "size_rd"
+
+                        class_1 = "other_field "
+                        if is_md5_full:
+                            class_1 = "main_field "
+                        # class_1 will be file_size in case of non-mac files otherwise file_size_rd
+                        if is_size:
+                            class_1 = "main_field "
+                        if is_mac_file and is_size_rd:
+                            class_1 = "main_field "
+                        class_2 = tr_class
+                        tag_class = class_1 + class_2
+                        default_display = ""
+                        if class_1 == "other_field ":
+                            default_display = "none"
+
+                        html += f"""<tr class="{tag_class}" style="display: {default_display};">
+                            <td>{key}</td>
+                            <td><input type="checkbox" name="options[]" value="{source_val}" {source_checked}>{source_value}</td>
+                            <td><input type="checkbox" name="options[]" value="{target_val}">{target_value}</td>
+                        </tr>"""
+
+                # Next file categories do not contain common files
+                is_common_file = False
+
+            matched_dict = {
+                target.lower(): source.lower() for (target, source) in matched_files
+            }
+            escaped_json = html_lib.escape(json.dumps(matched_dict))
+            html += f'<input type="hidden" name="matched_files" value="{escaped_json}">'
 
             html += """
             </table>
@@ -1143,6 +1068,7 @@ def highlight_differences(source, target):
                 <input id="confirm_merge_cancel" type="submit" value="Cancel">
             </form>
             <script src="{{ url_for('static', filename='js/confirm_merge_form_handler.js') }}"></script>
+            <script src="{{ url_for('static', filename='js/update_merge_table_rows.js') }}"></script>
             <script>
             document.getElementById("confirm_merge_form").addEventListener("submit", function () {
                 document.getElementById("merging-status").style.display = "block";
@@ -1150,22 +1076,6 @@ def highlight_differences(source, target):
                 document.getElementById("confirm_merge_cancel").style.display = "none";
             });
             </script>
-            <script>
-            document.getElementById("toggle-unmatched").addEventListener("change", function() {
-                const rows = document.querySelectorAll("tr.unmatched");
-                rows.forEach(row => {
-                    row.style.display = this.checked ? "" : "none";
-                });
-            });
-            </script>
-            <script>
-            function toggleGroup(groupId) {
-                const rows = document.querySelectorAll(`#${groupId}`);
-                rows.forEach(row => {
-                    row.style.display = (row.style.display === "none") ? "" : "none";
-                });
-            }
-            </script>
             </body>
             </html>
             """
@@ -1186,6 +1096,7 @@ def execute_merge(id):
     source_id = data.get("source_id")
     target_id = data.get("target_id")
     options = data.get("options")
+    matched_dict = json.loads(data.get("matched_files"))
 
     base_dir = os.path.dirname(os.path.abspath(__file__))
     config_path = os.path.join(base_dir, "mysql_config.json")
@@ -1205,146 +1116,67 @@ def execute_merge(id):
         with connection.cursor() as cursor:
             cursor.execute("SELECT * FROM fileset WHERE id = %s", (source_id,))
             source_fileset = cursor.fetchone()
-            cursor.execute("SELECT * FROM fileset WHERE id = %s", (target_id,))
 
+            status = "full"
             if source_fileset["status"] == "dat":
-                cursor.execute(
-                    """
-                    UPDATE fileset SET
-                    status = %s,
-                    `key` = %s,
-                    `timestamp` = %s
-                    WHERE id = %s
-                """,
-                    (
-                        "partial",
-                        source_fileset["key"],
-                        source_fileset["timestamp"],
-                        target_id,
-                    ),
-                )
-
-                source_filenames = set()
-                change_fileset_id = set()
-                file_details_map = defaultdict(dict)
-
-                for file in options:
-                    filename = file["filename"].lower()
-                    if "detection" not in file_details_map[filename]:
-                        file_details_map[filename]["detection"] = file["detection"]
-                        file_details_map[filename]["detection_type"] = file["prop"]
-                    elif (
-                        "detection" in file_details_map[filename]
-                        and file_details_map[filename]["detection"] != "1"
-                    ):
-                        file_details_map[filename]["detection"] = file["detection"]
-                        file_details_map[filename]["detection_type"] = file["prop"]
-                    if file["prop"].startswith("md5"):
-                        if "checksums" not in file_details_map[filename]:
-                            file_details_map[filename]["checksums"] = []
-                        file_details_map[filename]["checksums"].append(
-                            {"check": file["prop"], "value": file["value"]}
-                        )
-                    if file["side"] == "source":
-                        source_filenames.add(filename)
-
-                # Delete older checksums
-                for file in options:
-                    filename = file["filename"].lower()
-                    if file["side"] == "source":
-                        cursor.execute(
-                            """SELECT f.id as file_id FROM file f
-                                       JOIN fileset fs ON fs.id = f.fileset 
-                                       WHERE f.name = %s
-                                       AND fs.id = %s""",
-                            (filename, source_id),
-                        )
-                        file_id = cursor.fetchone()["file_id"]
-                        query = """
-                            DELETE FROM filechecksum
-                            WHERE file = %s
-                        """
-                        cursor.execute(query, (file_id,))
-                    else:
-                        if filename not in source_filenames:
-                            cursor.execute(
-                                """SELECT f.id as file_id FROM file f
-                            JOIN fileset fs ON fs.id = f.fileset 
-                            WHERE f.name = %s
-                            AND fs.id = %s""",
-                                (filename, target_id),
-                            )
-                            target_file_id = cursor.fetchone()["file_id"]
-                            change_fileset_id.add(target_file_id)
-
-                for filename, details in file_details_map.items():
-                    cursor.execute(
-                        """SELECT f.id as file_id FROM file f
-                                    JOIN fileset fs ON fs.id = f.fileset 
-                                    WHERE f.name = %s
-                                    AND fs.id = %s""",
-                        (filename, source_id),
-                    )
-                    source_file_id = cursor.fetchone()["file_id"]
-                    detection = (
-                        details["detection"] == "1" if "detection" in details else False
-                    )
-                    if detection:
-                        query = """
-                            UPDATE file 
-                            SET detection = 1,
-                            detection_type = %s
-                            WHERE id = %s
-                        """
-                        cursor.execute(
-                            query,
-                            (
-                                details["detection_type"],
-                                source_file_id,
-                            ),
-                        )
-                        filename = os.path.basename(filename).lower()
-                        cursor.execute(
-                            """SELECT f.id as file_id FROM file f
-                                    JOIN fileset fs ON fs.id = f.fileset 
-                                    WHERE REGEXP_REPLACE(f.name, '^.*[\\\\/]', '') = %s
-                                    AND fs.id = %s""",
-                            (filename, target_id),
-                        )
-                        target_file_id = cursor.fetchone()["file_id"]
-                        cursor.execute(
-                            "DELETE FROM file WHERE id = %s", (target_file_id,)
-                        )
-
-                    check = ""
-                    checksize = ""
-                    checktype = ""
-                    checksum = ""
-
-                    if "checksums" in details:
-                        for c in details["checksums"]:
-                            checksum = c["value"]
-                            check = c["check"]
-                            checksize, checktype, checksum = get_checksum_props(
-                                check, checksum
-                            )
-                            query = "INSERT INTO filechecksum (file, checksize, checktype, checksum) VALUES (%s, %s, %s, %s)"
-                            cursor.execute(
-                                query, (source_file_id, checksize, checktype, checksum)
-                            )
+                status = "partial"
+            cursor.execute(
+                """
+                UPDATE fileset SET
+                status = %s,
+                `key` = %s,
+                `timestamp` = %s
+                WHERE id = %s
+            """,
+                (
+                    status,
+                    source_fileset["key"],
+                    source_fileset["timestamp"],
+                    target_id,
+                ),
+            )
 
-                    cursor.execute(
-                        "UPDATE file SET fileset = %s WHERE id = %s",
-                        (target_id, source_file_id),
-                    )
+            file_details_map = defaultdict(dict)
+
+            for file in options:
+                filename = file["filename"].lower()
+                if filename in matched_dict:
+                    filename = matched_dict[filename]
+                file_details_map[filename]["name"] = filename
+                # If we have confirmed that given file is a detection file, then we continue
+                if "detection" not in file_details_map[filename] or (
+                    "detection" in file_details_map[filename]
+                    and file_details_map[filename]["detection"] != "1"
+                ):
+                    file_details_map[filename]["detection"] = file["detection"]
+                    file_details_map[filename]["detection_type"] = file["prop"]
+                if file["prop"].startswith("md5"):
+                    file_details_map[filename][file["prop"]] = file["value"]
+                if file["prop"].startswith("size"):
+                    file_details_map[filename][file["prop"]] = file["value"]
+
+            query = "DELETE FROM file WHERE fileset = %s"
+            cursor.execute(query, (target_id,))
+            query = "DELETE FROM fileset WHERE id = %s"
+            cursor.execute(query, (source_id,))
 
-                # for target_file_id in change_fileset_id:
-                #     query = """
-                #         UPDATE file
-                #         SET fileset = %s
-                #         WHERE id = %s
-                #     """
-                #     cursor.execute(query, (source_id, target_file_id))
+            for filename, details in file_details_map.items():
+                detection = (
+                    details["detection"] == "1" if "detection" in details else False
+                )
+                insert_file(details, detection, "", connection, target_id)
+                cursor.execute("SELECT @file_last AS file_id")
+                file_id = cursor.fetchone()["file_id"]
+                for key in details:
+                    if key not in [
+                        "name",
+                        "size",
+                        "size-r",
+                        "size-rd",
+                        "detection",
+                        "detection_type",
+                    ]:
+                        insert_filechecksum(details, key, file_id, connection)
 
             cursor.execute(
                 """
@@ -1695,4 +1527,4 @@ def delete_files(id):
 
 if __name__ == "__main__":
     app.secret_key = secret_key
-    app.run(debug=True, host="0.0.0.0")
+    app.run(port=5001, debug=True, host="0.0.0.0")
diff --git a/pagination.py b/pagination.py
index f611cb6f..4a77337c 100644
--- a/pagination.py
+++ b/pagination.py
@@ -279,7 +279,7 @@ def create_page(
                             f"<a href='fileset?id={fileset_id}'>{fileset_text}</a>",
                         )
 
-                html += f"<td>{value}</td>\n"
+                html += f"<td>{'' if value is None else value}</td>\n"
             html += "</tr>\n"
             counter += 1
 
diff --git a/static/js/confirm_merge_form_handler.js b/static/js/confirm_merge_form_handler.js
index d514091b..8487ff15 100644
--- a/static/js/confirm_merge_form_handler.js
+++ b/static/js/confirm_merge_form_handler.js
@@ -8,7 +8,8 @@ document.getElementById("confirm_merge_form").addEventListener("submit", async f
   const jsonData = {
     source_id: source_id,
     target_id: form.querySelector('input[name="target_id"]').value,
-    options: []
+    options: [],
+    matched_files: form.querySelector('input[name="matched_files"]').value
   };
   
   const checkedBoxes = form.querySelectorAll('input[name="options[]"]:checked');
diff --git a/static/js/update_merge_table_rows.js b/static/js/update_merge_table_rows.js
new file mode 100644
index 00000000..cb1c4c7f
--- /dev/null
+++ b/static/js/update_merge_table_rows.js
@@ -0,0 +1,53 @@
+const toggleCommonFiles = document.getElementById("toggle-common-files");
+const toggleAllFields = document.getElementById("toggle-all-fields");
+
+function updateTableRows() {
+    const rows = document.querySelectorAll("tr");
+
+    const showUnmatched = !toggleCommonFiles.checked;
+    const showAllFields = toggleAllFields.checked;
+
+    rows.forEach(row => {
+        const is_matched = row.classList.contains("matched");
+        const is_unmatched = row.classList.contains("unmatched");
+        const is_main = row.classList.contains("main_field");
+        const is_other = row.classList.contains("other_field");
+
+        if ((is_matched || is_unmatched) && !(is_main || is_other)) {
+            if (showUnmatched) {
+                show = true
+            } else {
+                show = is_matched
+            }
+            row.style.display = show ? "" : "none";
+        }
+        else if (!(is_matched || is_unmatched) || !(is_main || is_other)) {
+            return;
+        }
+        else {
+            let show = false;
+
+            // Case 1: unmatched files checkbox - off, all file fields checkbox - off
+            if (!showUnmatched && !showAllFields) {
+                show = is_matched && is_main;
+            }
+            // Case 2: off, on
+            else if (!showUnmatched && showAllFields) {
+                show = is_matched && (is_main || is_other);
+            }
+            // Case 3: on, off
+            else if (showUnmatched && !showAllFields) {
+                show = is_main && (is_matched || is_unmatched);
+            }
+            // Case 4: off, off
+            else if (showUnmatched && showAllFields) {
+                show = true;
+            }
+
+            row.style.display = show ? "" : "none";
+        }
+    });
+}
+
+toggleCommonFiles.addEventListener("change", updateTableRows);
+toggleAllFields.addEventListener("change", updateTableRows);

From 3bddf9e92c948ddb1b34dcd09abf5550557db199 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Wed, 6 Aug 2025 18:03:11 +0530
Subject: [PATCH 39/47] INTEGRITY: Remove icon for default sorting state.

---
 pagination.py    | 29 +++++++++++++++++++----------
 static/style.css | 11 +++--------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/pagination.py b/pagination.py
index 4a77337c..57d634ec 100644
--- a/pagination.py
+++ b/pagination.py
@@ -209,7 +209,7 @@ def create_page(
     for key in filters.keys():
         base_params = {k: v for k, v in request.args.items() if k != "sort"}
         icon_path = "icons/filter/"
-        icon_name = ""
+        icon_name = "no_icon"
 
         if key == sort_key:
             if sort_dir == "asc":
@@ -220,26 +220,35 @@ def create_page(
                 icon_name = "arrow_drop_down.png"
             else:
                 next_sort_dir = "asc"
-                icon_name = "unfold_more.png"
 
             if next_sort_dir != "default":
                 sort_param = f"{key}-{next_sort_dir}"
                 base_params["sort"] = sort_param
         else:
-            icon_name = "unfold_more.png"
             sort_param = f"{key}-asc"
             base_params["sort"] = sort_param
 
         query_string = "&".join(f"{k}={v}" for k, v in base_params.items())
         if key != "checksum":
             icon_src = url_for("static", filename=icon_path + icon_name)
-            html += f"""<th>
-                <a href='{filename}?{query_string}' class="header-link">
-                    <span></span>
-                    <span class="key-text">{key}</span>
-                    <img class="filter-icon" src="{icon_src}" alt="asc" width="25">
-                </a>
-            </th>"""
+            if icon_name != "no_icon":
+                html += f"""<th>
+                    <a href='{filename}?{query_string}' class="header-link">
+                        <div style="display:flex; align-items:center; width:100%;">
+                            <span style="flex:1; text-align:center;">{key}</span>
+                            <img src="{icon_src}" class="filter-icon" alt="asc" style="margin-left:auto;">
+                        </div>
+                    </a>
+                </th>"""
+            else:
+                html += f"""<th>
+                    <a href='{filename}?{query_string}' class="header-link">
+                        <div style="display:flex; align-items:center; width:100%;">
+                            <span style="flex:1; text-align:center;">{key}</span>
+                            <span style="width: 18px"></span>
+                        </div>
+                    </a>
+                </th>"""
 
     if results:
         counter = offset + 1
diff --git a/static/style.css b/static/style.css
index 720e9f77..aab06fbf 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3,8 +3,7 @@
   font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
 
-td,
-th {
+td {
   padding-inline: 5px;
 }
 
@@ -33,13 +32,11 @@ td.filter {
 }
 
 th {
-  padding-top: 5px;
-  padding-bottom: 5px;
   text-align: center;
   background-color: var(--primary-color);
   color: white;
-  height: 30px;
   vertical-align: middle;
+  padding: 4px;
 }
 
 th a {
@@ -84,12 +81,10 @@ nav {
   justify-content: space-between;
   text-decoration: none;
   color: inherit;
-  padding: 4px 8px;
 }
 
 .filter-icon {
-  height: auto;
-  margin-left: 8px;
+  width: 18px;
 }
 
 button {

From 1efdc2a482e2ca64ee9ba5bc7f5fa3fe1f2d22f1 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 7 Aug 2025 03:19:27 +0530
Subject: [PATCH 40/47] INTEGRITY: Decode macbinary's filename as mac roman
 instead of utf-8.

---
 compute_hash.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/compute_hash.py b/compute_hash.py
index ba8c62f2..9d673cdd 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -553,7 +553,8 @@ def extract_macbin_filename_from_header(file):
         header = f.read(128)
         name_len = header[1]
         filename_bytes = header[2 : 2 + name_len]
-        return filename_bytes.decode("utf-8")
+        filename = filename_bytes.decode("mac_roman")
+        return filename
 
 
 def file_classification(filepath):
@@ -562,7 +563,8 @@ def file_classification(filepath):
 
     # 1. Macbinary
     if is_macbin(filepath):
-        return [FileType.MAC_BINARY, extract_macbin_filename_from_header(filepath)]
+        base_name = extract_macbin_filename_from_header(filepath)
+        return [FileType.MAC_BINARY, base_name]
 
     # 2. Appledouble .rsrc
     if is_appledouble_rsrc(filepath):

From 1abc41e3678c3bb26e22b9c1e6bc7fa1abb42dbe Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 7 Aug 2025 03:21:21 +0530
Subject: [PATCH 41/47] INTEGRITY: Wrap filename in scanned dat in double
 quotes instead of single.

---
 compute_hash.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compute_hash.py b/compute_hash.py
index 9d673cdd..3a395ebf 100644
--- a/compute_hash.py
+++ b/compute_hash.py
@@ -818,7 +818,7 @@ def create_dat_file(hash_of_dirs, path, checksum_size=0):
                 timestamp,
             ) in hash_of_dir.items():
                 filename = encode_path_components(filename)
-                data = f"name '{filename}' size {size} size-r {size_r} size-rd {size_rd} modification-time {timestamp}"
+                data = f"""name "{filename}" size {size} size-r {size_r} size-rd {size_rd} modification-time {timestamp}"""
                 for key, value in hashes:
                     data += f" {key} {value}"
 

From 5c6bbf1251f501f95b927b5bfbf6375112daf2e7 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 7 Aug 2025 03:25:46 +0530
Subject: [PATCH 42/47] INTEGRITY: Update size filtering logic for scan.dat for
 macfiles.

---
 db_functions.py | 131 ++++++++++++++++++++++++++----------------------
 1 file changed, 71 insertions(+), 60 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index daec2550..a018f38c 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -59,7 +59,10 @@ def get_checksum_props(checkcode, checksum):
         # For type md5-t-5000
         if last == "1M" or last.isdigit():
             checksize = last
-        checktype = "-".join(exploded_checkcode)
+            checktype = "-".join(exploded_checkcode)
+        # For type md5-r, md5-d
+        else:
+            checktype = checkcode
 
     # Detection entries have checktypes as part of the checksum prefix
     if ":" in checksum:
@@ -431,6 +434,8 @@ def calc_key(fileset):
         for key, value in file.items():
             if key == "name":
                 value = value.lower()
+            if key == "modification-time":
+                continue
             key_string += ":" + str(value)
 
     key_string = key_string.strip(":")
@@ -712,6 +717,7 @@ def scan_process(
     dropped_early_no_candidate = 0
     manual_merged_with_detection = 0
     filesets_with_missing_files = 0
+    duplicate_or_existing_entry = 0
 
     id_to_fileset_mapping = defaultdict(dict)
 
@@ -738,6 +744,7 @@ def scan_process(
             skiplog=skiplog,
         )
         if existing:
+            duplicate_or_existing_entry += 1
             continue
 
         id_to_fileset_mapping[fileset_id] = fileset
@@ -752,11 +759,11 @@ def scan_process(
 
     fileset_count = 0
     for fileset_id, fileset in id_to_fileset_mapping.items():
+        fileset_count += 1
         console_log_matching(fileset_count)
         candidate_filesets = filter_candidate_filesets(
             fileset["rom"], transaction_id, conn
         )
-
         if len(candidate_filesets) == 0:
             category_text = "Drop fileset - No Candidates"
             fileset_name = fileset["name"] if "name" in fileset else ""
@@ -792,8 +799,6 @@ def scan_process(
             conn,
             skiplog,
         )
-        fileset_count += 1
-
     # If any partial fileset turned full with pre file updates, turn it full
     update_status_for_partial_filesets(list(filesets_check_for_full), conn)
 
@@ -803,12 +808,14 @@ def scan_process(
             "SELECT COUNT(fileset) from transactions WHERE `transaction` = %s",
             (transaction_id,),
         )
-        fileset_insertion_count = cursor.fetchone()["COUNT(fileset)"]
+        fileset_insertion_count = (
+            cursor.fetchone()["COUNT(fileset)"] + duplicate_or_existing_entry
+        )
         category_text = f"Uploaded from {src}"
         log_text = f"Completed loading DAT file, filename {filepath}, size {os.path.getsize(filepath)}. State {source_status}. Number of filesets: {fileset_insertion_count}. Transaction: {transaction_id}"
         create_log(category_text, user, log_text, conn)
         category_text = "Upload information"
-        log_text = f"Number of filesets: {fileset_insertion_count}. Filesets automatically merged: {automatic_merged_filesets}. Filesets requiring manual merge (multiple candidates): {manual_merged_filesets}. Filesets requiring manual merge (matched with detection): {manual_merged_with_detection}. Filesets dropped, no candidate: {dropped_early_no_candidate}. Filesets matched with existing Full fileset: {match_with_full_fileset}. Filesets with mismatched files with Full fileset: {mismatch_with_full_fileset}. Filesets missing files compared to partial fileset candidate: {filesets_with_missing_files}."
+        log_text = f"Number of filesets: {fileset_insertion_count}. Duplicate or existing filesets: {duplicate_or_existing_entry}. Filesets automatically merged: {automatic_merged_filesets}. Filesets requiring manual merge (multiple candidates): {manual_merged_filesets}. Filesets requiring manual merge (matched with detection): {manual_merged_with_detection}. Filesets dropped, no candidate: {dropped_early_no_candidate}. Filesets matched with existing Full fileset: {match_with_full_fileset}. Filesets with mismatched files with Full fileset: {mismatch_with_full_fileset}. Filesets missing files compared to partial fileset candidate: {filesets_with_missing_files}."
         console_log(log_text)
         create_log(category_text, user, log_text, conn)
 
@@ -1071,6 +1078,7 @@ def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn
         filename_to_filepath_map = defaultdict(str)
         filepath_to_checksum_map = defaultdict(dict)
         filepath_to_sizes_map = defaultdict(dict)
+        filepath_to_mod_time_map = defaultdict(dict)
 
         for file in fileset["rom"]:
             base_name = os.path.basename(normalised_path(file["name"])).lower()
@@ -1085,6 +1093,7 @@ def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn
                     sizes[key] = file[key]
 
             filepath_to_sizes_map[file["name"]] = sizes
+            filepath_to_mod_time_map[file["name"]] = file["modification-time"]
             filepath_to_checksum_map[file["name"]] = checksums
             same_filename_count[base_name] += 1
             filename_to_filepath_map[base_name] = file["name"]
@@ -1128,21 +1137,30 @@ def update_all_files(fileset, candidate_fileset_id, is_candidate_detection, conn
                 UPDATE file
                 SET size = %s,
                 `size-r` = %s,
-                `size-rd` = %s
+                `size-rd` = %s,
+                `modification-time` = %s
             """
             sizes = filepath_to_sizes_map[filepath]
+            mod_time = filepath_to_mod_time_map[filepath]
             if is_candidate_detection:
                 query += ",name = %s WHERE id = %s"
                 params = (
                     sizes["size"],
                     sizes["size-r"],
                     sizes["size-rd"],
+                    mod_time,
                     normalised_path(filepath),
                     file_id,
                 )
             else:
                 query += "WHERE id = %s"
-                params = (sizes["size"], sizes["size-r"], sizes["size-rd"], file_id)
+                params = (
+                    sizes["size"],
+                    sizes["size-r"],
+                    sizes["size-rd"],
+                    mod_time,
+                    file_id,
+                )
             cursor.execute(query, params)
 
 
@@ -1224,7 +1242,12 @@ def filter_candidate_filesets(roms, transaction_id, conn):
                         file[key],
                         name.lower(),
                         int(file["size"]),
-                        int(file["size-r"]),
+                    )
+                )
+                set_checksums.add(
+                    (
+                        file[key],
+                        name.lower(),
                         int(file["size-rd"]),
                     )
                 )
@@ -1233,16 +1256,11 @@ def filter_candidate_filesets(roms, transaction_id, conn):
                         file[key],
                         name.lower(),
                         -1,
-                        int(file["size-r"]),
-                        int(file["size-rd"]),
                     )
                 )
-        set_file_name_size.add(
-            (name.lower(), -1, int(file["size-r"]), int(file["size-rd"]))
-        )
-        set_file_name_size.add(
-            (name.lower(), int(file["size"]), int(file["size-r"]), int(file["size-rd"]))
-        )
+        set_file_name_size.add((name.lower(), -1))
+        set_file_name_size.add((name.lower(), int(file["size-rd"])))
+        set_file_name_size.add((name.lower(), int(file["size"])))
 
     # Filter candidates by detection filename and file size (including -1) and increase matched file count
     # if filesize = -1,
@@ -1254,50 +1272,43 @@ def filter_candidate_filesets(roms, transaction_id, conn):
         with conn.cursor() as cursor:
             for f in files:
                 filename = os.path.basename(f["name"]).lower()
-                size = f["size"]
-                size_r = f["size-r"]
-                size_rd = f["size-rd"]
-                if (filename, size, size_r, size_rd) in set_file_name_size:
-                    if size == -1:
-                        count += 1
-                    else:
-                        cursor.execute(
-                            """
-                            SELECT checksum, checksize, checktype
-                            FROM filechecksum
-                            WHERE file = %s
-                        """,
-                            (f["file_id"],),
-                        )
-                        checksums = cursor.fetchall()
-                        not_inc_count = False
-                        for c in checksums:
-                            filesize = size
-                            checksum = c["checksum"]
-                            checksize = c["checksize"]
-                            checktype = c["checktype"]
-                            # Macfiles handling
-                            if checktype in ["md5-r", "md5-rt"]:
-                                filesize = size_rd
-
-                            if checksize == "1M":
-                                checksize = 1048576
-                            elif checksize == "0":
-                                checksize = filesize
-                            if filesize <= int(checksize):
-                                if (
-                                    checksum,
-                                    filename,
-                                    size,
-                                    size_r,
-                                    size_rd,
-                                ) in set_checksums:
-                                    count += 1
-                                not_inc_count = True
-                                # if it was a true match, checksum should be present
-                                break
-                        if not not_inc_count:
+                sizes = [f["size"], f["size-rd"]]
+                for size in sizes:
+                    if (filename, size) in set_file_name_size:
+                        if size == -1:
                             count += 1
+                        else:
+                            cursor.execute(
+                                """
+                                SELECT checksum, checksize, checktype
+                                FROM filechecksum
+                                WHERE file = %s
+                            """,
+                                (f["file_id"],),
+                            )
+                            checksums = cursor.fetchall()
+                            not_inc_count = False
+                            for c in checksums:
+                                filesize = size
+                                checksum = c["checksum"]
+                                checksize = c["checksize"]
+
+                                if checksize == "1M":
+                                    checksize = 1048576
+                                elif checksize == "0":
+                                    checksize = filesize
+                                if filesize <= int(checksize):
+                                    if (
+                                        checksum,
+                                        filename,
+                                        size,
+                                    ) in set_checksums:
+                                        count += 1
+                                    not_inc_count = True
+                                    # if it was a true match, checksum should be present
+                                    break
+                            if not not_inc_count:
+                                count += 1
         if count > 0 and total_detection_files_map[fileset_id] <= count:
             match_counts[fileset_id] = count
 

From 76134b1ff7ccec547564c0a2876103370e593881 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Thu, 7 Aug 2025 19:34:59 +0530
Subject: [PATCH 43/47] INTEGRITY: Check for rt and dt checktype suffix while
 adding equal checksums.

---
 db_functions.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/db_functions.py b/db_functions.py
index a018f38c..d00f796d 100644
--- a/db_functions.py
+++ b/db_functions.py
@@ -298,7 +298,9 @@ def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
         if "md5" not in checktype:
             return
         size_name = "size"
-        if checktype[-1] == "r":
+
+        # e.g md5-r or md5-rt-5000
+        if checktype.endswith("r") or checktype.endswith("rt"):
             size_name += "-rd"
 
         cursor.execute(f"SELECT `{size_name}` FROM file WHERE id = %s", (file_id,))
@@ -320,7 +322,13 @@ def add_all_equal_checksums(checksize, checktype, checksum, file_id, conn):
                 "default": ["md5-0", "md5-1M", "md5-5000", "md5-t-5000"],
             }
 
-            key = checktype[-1] if checktype[-1] in md5_variants_map else "default"
+            if checktype.endswith("rt") or checktype.endswith("r"):
+                key = "r"
+            elif checktype.endswith("dt") or checktype.endswith("d"):
+                key = "d"
+            else:
+                key = "default"
+
             variants = md5_variants_map[key]
             inserted_checksum_type = f"{checktype}-{checksize}"
 

From a843c2151d56f79f604c958904615216159898d7 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 8 Aug 2025 22:23:21 +0530
Subject: [PATCH 44/47] INTEGRITY: Add validation checks on user data from the
 payload along with rate limiting.

---
 fileset.py               |  28 ++++++-
 requirements.txt         |   1 +
 validate_user_payload.py | 156 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 182 insertions(+), 3 deletions(-)
 create mode 100644 validate_user_payload.py

diff --git a/fileset.py b/fileset.py
index c0cacd5a..0e276282 100644
--- a/fileset.py
+++ b/fileset.py
@@ -31,7 +31,18 @@
 from collections import defaultdict
 from schema import init_database
 
+from validate_user_payload import validate_user_payload
+
+from flask_limiter import Limiter
+from flask_limiter.util import get_remote_address
+
 app = Flask(__name__)
+limiter = Limiter(
+    get_remote_address,
+    app=app,
+    default_limits=[],
+    storage_uri="memory://",
+)
 
 secret_key = os.urandom(24)
 
@@ -1348,6 +1359,7 @@ def get_width(name, default):
 
 
 @app.route("/validate", methods=["POST"])
+@limiter.limit("3 per minute")
 def validate():
     error_codes = {
         "unknown": -1,
@@ -1361,10 +1373,20 @@ def validate():
     ip = request.remote_addr
     ip = ".".join(ip.split(".")[:3]) + ".X"
 
-    game_metadata = {k: v for k, v in json_object.items() if k != "files"}
-
+    is_valid_payload, response_message = validate_user_payload(json_object)
     json_response = {"error": error_codes["success"], "files": []}
 
+    if not is_valid_payload:
+        json_response["error"] = error_codes["unknown"]
+        json_response["status"] = response_message
+        category = "Invalid user payload."
+        text = f"User payload is not valid. User IP: {ip}, Status: {response_message}"
+        conn = db_connect()
+        create_log(category, ip, text, conn)
+        return jsonify(json_response)
+
+    game_metadata = {k: v for k, v in json_object.items() if k != "files"}
+
     file_object = json_object["files"]
     if not file_object:
         json_response["error"] = error_codes["empty"]
@@ -1527,4 +1549,4 @@ def delete_files(id):
 
 if __name__ == "__main__":
     app.secret_key = secret_key
-    app.run(port=5001, debug=True, host="0.0.0.0")
+    app.run(debug=False, host="0.0.0.0")
diff --git a/requirements.txt b/requirements.txt
index 8340e269..6547c1ea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ pytest
 setuptools
 Werkzeug
 wheel
+Flask-Limiter
diff --git a/validate_user_payload.py b/validate_user_payload.py
new file mode 100644
index 00000000..41dc445e
--- /dev/null
+++ b/validate_user_payload.py
@@ -0,0 +1,156 @@
+import re
+
+MAX_FILES = 10000
+MAX_CHECKSUMS_PER_FILE = 8
+VALID_KEYS = {"gameid", "engineid", "extra", "platform", "language", "files"}
+VALID_FILE_KEYS = {"name", "size", "size-r", "size-rd", "checksums"}
+
+# Field lengths are taken from the defined schema
+FIELD_MAX_SIZES = {
+    # Metadata
+    "gameid": 100,
+    "engineid": 100,
+    "extra": 200,
+    "platform": 30,
+    "language": 10,
+    # File
+    "name": 200,
+    "size": 64,
+    "size-r": 64,
+    "size-rd": 64,
+}
+
+"""
+    Example payload - 
+
+    {
+        "gameid": "this_is_game_id",
+        "engineid": "this_is_engine_id",
+        "extra": "this_is_extra",
+        "platform": "this_is_platform",
+        "language": "lang",
+        "files": [
+            {
+                "name": "file1",
+                "size": "1234",
+                "checksums": [
+                                {"type": "md5", "checksum": "12345abcde12345ABCDE12345ABCDEab"}
+                            ]
+            }
+        ]
+    }
+"""
+
+
+def is_valid_md5(value):
+    """
+    Check if the md5 is 32 character long and contains [a-fA-F0-9]
+    """
+    if not isinstance(value, str):
+        return False
+    return re.fullmatch(r"[a-fA-F0-9]{32}", value) is not None
+
+
+def validate_field_len(field_name, value, max_size):
+    """
+    General length validator for values.
+    """
+    if not isinstance(value, str):
+        return False, f"{field_name}_invalid_type"
+
+    if len(value) > max_size:
+        return False, f"{field_name}_length_exceeded"
+
+    return True, "valid"
+
+
+def validate_user_payload(json_object):
+    """
+    All the checks on user data are performed here.
+    - Datatype of all values
+    - General structure of the payload
+    - Any key missing
+    - Max length of values
+    - Max number of files
+    - Max number of checksums per file
+    - Missing filename
+    - Valid numeric size
+    - Valid md5 checksum
+    """
+    # Ensure the payload is a dictionary
+    if not isinstance(json_object, dict):
+        return False, "invalid_json_object"
+
+    # Ensure any key is not missing
+    missing_keys = VALID_KEYS - json_object.keys()
+    if missing_keys:
+        return False, f"missing_required_keys - {list(missing_keys)}"
+
+    # Validating metadata's max length
+    for key in json_object:
+        if key in VALID_KEYS and key != "files":
+            valid, res = validate_field_len(key, json_object[key], FIELD_MAX_SIZES[key])
+            if not valid:
+                return False, res
+
+    # Ensure files are present as a list
+    files = json_object.get("files", [])
+    if not isinstance(files, list):
+        return False, "files_should_be_list"
+
+    # Bounds on the number of files
+    if len(files) == 0:
+        return False, "empty_fileset"
+    if len(files) > MAX_FILES:
+        return False, f"too_many_files - {len(files)}"
+
+    # Processing every file entry
+    for file_entry in files:
+        if not isinstance(file_entry, dict):
+            return False, "invalid_file_entry"
+
+        # Ensure filename exist
+        if "name" not in file_entry:
+            return False, "missing_filename"
+        # Validating file keys maximum length other than checksums
+        for file_key in ["name", "size", "size-r", "size-rd"]:
+            if file_key in file_entry:
+                valid, res = validate_field_len(
+                    file_key, file_entry[file_key], FIELD_MAX_SIZES[file_key]
+                )
+                if file_key.startswith("size"):
+                    value = file_entry[file_key]
+                    if not value.isdigit():
+                        return False, f"{file_key}_not_a_number"
+            if not valid:
+                return False, res
+
+        # Validation for checksums
+        checksums_raw = file_entry.get("checksums", [])
+        if not isinstance(checksums_raw, list):
+            return False, "invalid_checksum_format: not a list"
+        # Maximum number of checksums should be 8 in case of mac files.
+        if len(checksums_raw) > MAX_CHECKSUMS_PER_FILE:
+            return False, f"checksums_number_exceeded: {len(checksums_raw)}"
+
+        for checksum_entry in checksums_raw:
+            if not isinstance(checksum_entry, dict):
+                return False, f"invalid_checksum_entry - {checksum_entry}"
+            if "type" not in checksum_entry or "checksum" not in checksum_entry:
+                return False, "checksum_missing_fields"
+            ctype = checksum_entry["type"]
+            cvalue = checksum_entry["checksum"]
+            if not ctype.startswith("md5"):
+                return False, f"unsupported_checksum_type: {ctype}"
+            # md5 should be 32 character long and have only a-fA-F0-9
+            if not is_valid_md5(cvalue):
+                return False, f"invalid_md5_format: {ctype}"
+
+        # Check if other keys than md5 are valid
+        for key in file_entry:
+            if key.startswith("md5"):
+                continue
+            if key not in VALID_FILE_KEYS:
+                return False, f"invalid_file_key: {file_entry['name']} - {key}"
+
+    return True, "valid"

From 6b81c4457f85ce5b0e015ea677b9680706a887b0 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 8 Aug 2025 22:38:34 +0530
Subject: [PATCH 45/47] INTEGRITY: Add python virtual environment path in
 apache config file.

---
 apache2-config/gamesdb.sev.zone.conf | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/apache2-config/gamesdb.sev.zone.conf b/apache2-config/gamesdb.sev.zone.conf
index 43563729..7386ded0 100644
--- a/apache2-config/gamesdb.sev.zone.conf
+++ b/apache2-config/gamesdb.sev.zone.conf
@@ -5,14 +5,17 @@
     CustomLog ${APACHE_LOG_DIR}/integrity-access.log combined
     ErrorLog ${APACHE_LOG_DIR}/integrity-error.log
     DocumentRoot /home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites
-    WSGIDaemonProcess scummvm-sites user=www-data group=www-data threads=5
+    WSGIDaemonProcess scummvm-sites \
+        user=www-data group=www-data threads=5 \
+        python-home=/home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites/venv
+    WSGIProcessGroup scummvm-sites
     WSGIScriptAlias / /home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites/app.wsgi
 
     <Directory /home/ubuntu/projects/python/scummvm_sites_2025/scummvm-sites>
         AuthType Basic
-	AuthName "nope"
-	AuthUserFile /home/ubuntu/projects/python/scummvm_sites_2025/.htpasswd
-	Require valid-user
+	    AuthName "nope"
+	    AuthUserFile /home/ubuntu/projects/python/scummvm_sites_2025/.htpasswd
+	    Require valid-user
     </Directory>
 
 </VirtualHost>

From f1e0964a0929c0de7169e8fbad6cf7589f3b21e9 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Fri, 8 Aug 2025 22:56:43 +0530
Subject: [PATCH 46/47] INTEGRITY: Remove apache basic auth from validate
 endpoint.

---
 apache2-config/gamesdb.sev.zone.conf | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/apache2-config/gamesdb.sev.zone.conf b/apache2-config/gamesdb.sev.zone.conf
index 7386ded0..058e0b26 100644
--- a/apache2-config/gamesdb.sev.zone.conf
+++ b/apache2-config/gamesdb.sev.zone.conf
@@ -18,4 +18,10 @@
 	    Require valid-user
     </Directory>
 
+    <Location "/validate">
+        AuthType None
+        Require all granted
+        Satisfy Any
+    </Location>
+
 </VirtualHost>

From 2d5286e91cf87a96b731634565649856a286f9d2 Mon Sep 17 00:00:00 2001
From: ShivangNagta <shivangnag@gmail.com>
Date: Sat, 9 Aug 2025 00:20:10 +0530
Subject: [PATCH 47/47] INTEGRITY: Delete unused files.

---
 .htaccess       | 16 -------------
 clear.py        | 62 -------------------------------------------------
 js_functions.js | 46 ------------------------------------
 megadata.py     | 40 -------------------------------
 4 files changed, 164 deletions(-)
 delete mode 100644 .htaccess
 delete mode 100644 clear.py
 delete mode 100644 js_functions.js
 delete mode 100644 megadata.py

diff --git a/.htaccess b/.htaccess
deleted file mode 100644
index 3af6f96e..00000000
--- a/.htaccess
+++ /dev/null
@@ -1,16 +0,0 @@
-RewriteCond %{REQUEST_FILENAME} !-d
-RewriteCond %{REQUEST_FILENAME}\.php -f
-RewriteRule ^(.*)$ $1.php [NC,L]
-
-<Files "mysql_config.json">
-    Order allow,deny
-    Deny from all
-</Files>
-<Files "bin/*">
-    Order allow,deny
-    Deny from all
-</Files>
-<Files "include/*">
-    Order allow,deny
-    Deny from all
-</Files>
diff --git a/clear.py b/clear.py
deleted file mode 100644
index ccc5588c..00000000
--- a/clear.py
+++ /dev/null
@@ -1,62 +0,0 @@
-"""
-This script deletes all data from the tables in the database and resets auto-increment counters.
-Using it when testing the data insertion.
-"""
-
-import pymysql
-import json
-import os
-
-
-def truncate_all_tables(conn):
-    # fmt: off
-    tables = ["filechecksum", "queue", "history", "transactions", "file", "fileset", "game", "engine", "log"]
-    cursor = conn.cursor()
-    # fmt: on
-
-    # Disable foreign key checks
-    cursor.execute("SET FOREIGN_KEY_CHECKS = 0")
-
-    for table in tables:
-        try:
-            cursor.execute(f"TRUNCATE TABLE `{table}`")
-            print(f"Table '{table}' truncated successfully")
-        except pymysql.Error as err:
-            print(f"Error truncating table '{table}': {err}")
-
-    # Enable foreign key checks
-    cursor.execute("SET FOREIGN_KEY_CHECKS = 1")
-
-
-if __name__ == "__main__":
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-    config_path = os.path.join(base_dir, "mysql_config.json")
-    with open(config_path) as f:
-        mysql_cred = json.load(f)
-
-    servername = mysql_cred["servername"]
-    username = mysql_cred["username"]
-    password = mysql_cred["password"]
-    dbname = mysql_cred["dbname"]
-
-    # Create connection
-    conn = pymysql.connect(
-        host=servername,
-        user=username,
-        password=password,
-        db=dbname,  # Specify the database to use
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-        autocommit=True,
-    )
-
-    # Check connection
-    if conn is None:
-        print("Error connecting to MySQL")
-        exit(1)
-
-    # Truncate all tables
-    truncate_all_tables(conn)
-
-    # Close connection
-    conn.close()
diff --git a/js_functions.js b/js_functions.js
deleted file mode 100644
index 187556ed..00000000
--- a/js_functions.js
+++ /dev/null
@@ -1,46 +0,0 @@
-function delete_id(value) {
-  $("#delete-confirm").slideDown();
-
-  $.ajax({
-    url: "fileset.php",
-    type: "post",
-    dataType: "json",
-    data: {
-      delete: value,
-    },
-  });
-}
-
-function match_id(value) {
-  $.ajax({
-    url: "fileset.php",
-    type: "post",
-    dataType: "json",
-    data: {
-      match: value,
-    },
-  });
-}
-
-function remove_empty_inputs() {
-  var myForm = document.getElementById("filters-form");
-  var allInputs = myForm.getElementsByTagName("input");
-  var input, i;
-
-  for (i = 0; (input = allInputs[i]); i++) {
-    if (input.getAttribute("name") && !input.value) {
-      console.log(input);
-      input.setAttribute("name", "");
-    }
-  }
-}
-
-function hyperlink(link) {
-  window.location = link;
-}
-
-$(document).ready(function () {
-  $(".hidden").hide();
-  $("#delete-button").one("click", delete_id);
-  $("#match-button").one("click", match_id);
-});
diff --git a/megadata.py b/megadata.py
deleted file mode 100644
index 0b4b3af7..00000000
--- a/megadata.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-
-
-class Megadata:
-    def __init__(self, file_path):
-        self.file_path = file_path
-        self.hash = self.calculate_hash(file_path)
-        self.size = os.path.getsize(file_path)
-        self.creation_time = os.path.getctime(file_path)
-        self.modification_time = os.path.getmtime(file_path)
-
-    def calculate_hash(self, file_path):
-        pass
-
-    def __eq__(self, other):
-        return (
-            self.hash == other.hash
-            and self.size == other.size
-            and self.creation_time == other.creation_time
-            and self.modification_time == other.modification_time
-        )
-
-
-def record_megadata(directory):
-    file_megadata = {}
-    for root, _, files in os.walk(directory):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_megadata[file_path] = Megadata(file_path)
-    return file_megadata
-
-
-def check_for_updates(old_megadata, current_directory):
-    current_megadata = record_megadata(current_directory)
-    updates = []
-    for old_path, old_data in old_megadata.items():
-        for current_path, current_data in current_megadata.items():
-            if old_data == current_data and old_path != current_path:
-                updates.append((old_path, current_path))
-    return updates