Skip to content

Commit 303c593

Browse files
authored
feat(scripts): add databend-meta log collection tool (#18539)
Add Python script to collect logs from databend-meta based on config file. Supports both log_dir and [log.file].dir configuration formats with validation to prevent conflicts. Includes automatic dependency installation and detailed progress reporting for troubleshooting.
1 parent 91b5011 commit 303c593

File tree

2 files changed

+247
-1
lines changed

2 files changed

+247
-1
lines changed
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
#!/usr/bin/env python3
2+
3+
import sys
4+
import os
5+
import tarfile
6+
import argparse
7+
import subprocess
8+
from datetime import datetime
9+
from pathlib import Path
10+
11+
def ensure_tomli_available():
12+
"""Ensure tomli is available, install if needed."""
13+
print("=== Checking dependencies ===")
14+
print(f"Python executable: {sys.executable}")
15+
print(f"Python version: {sys.version}")
16+
17+
try:
18+
import tomli
19+
print("✓ tomli already available")
20+
return tomli
21+
except ImportError:
22+
print("✗ tomli not found, installing...")
23+
try:
24+
print(f"Installing tomli using: {sys.executable} -m pip install tomli")
25+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tomli'],
26+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
27+
print("✓ tomli installed successfully")
28+
import tomli
29+
print("✓ tomli imported successfully")
30+
return tomli
31+
except subprocess.CalledProcessError as e:
32+
print(f"✗ Failed to install tomli: {e}")
33+
print("Please install manually: pip install tomli")
34+
sys.exit(1)
35+
except ImportError:
36+
print("✗ Failed to import tomli after installation")
37+
sys.exit(1)
38+
39+
# Install tomli at startup
40+
print("Databend Meta Log Collector")
41+
print("============================")
42+
tomli = ensure_tomli_available()
43+
print()
44+
45+
46+
def parse_config(config_file):
47+
"""Parse TOML config file and extract log directory."""
48+
try:
49+
with open(config_file, 'rb') as f:
50+
config = tomli.load(f)
51+
52+
# Check for log directory in two possible locations:
53+
# 1. Top-level: log_dir = "..."
54+
# 2. In [log.file] section: dir = "..."
55+
56+
top_level_log_dir = config.get('log_dir')
57+
58+
log_config = config.get('log', {})
59+
file_config = log_config.get('file', {})
60+
nested_log_dir = file_config.get('dir')
61+
62+
print(f"Top-level log_dir: {top_level_log_dir}")
63+
print(f"[log.file].dir: {nested_log_dir}")
64+
65+
# Validate configuration
66+
if top_level_log_dir and nested_log_dir:
67+
if top_level_log_dir != nested_log_dir:
68+
raise ValueError(
69+
f"Conflicting log directory settings found:\n"
70+
f" log_dir = '{top_level_log_dir}'\n"
71+
f" [log.file].dir = '{nested_log_dir}'\n"
72+
f"Please use only one log directory configuration."
73+
)
74+
print("✓ Both log directory settings present and match")
75+
return top_level_log_dir
76+
elif top_level_log_dir:
77+
print("✓ Using top-level log_dir setting")
78+
return top_level_log_dir
79+
elif nested_log_dir:
80+
print("✓ Using [log.file].dir setting")
81+
return nested_log_dir
82+
else:
83+
raise ValueError(
84+
"No log directory found in config file. "
85+
"Please set either 'log_dir' or '[log.file].dir'"
86+
)
87+
88+
except FileNotFoundError:
89+
raise FileNotFoundError(f"Config file '{config_file}' not found")
90+
except Exception as e:
91+
raise ValueError(f"Error parsing config file: {e}")
92+
93+
94+
def resolve_log_dir(log_dir, config_file):
95+
"""Resolve log directory path (handle relative paths)."""
96+
log_path = Path(log_dir)
97+
98+
if not log_path.is_absolute():
99+
# Resolve relative to config file directory
100+
config_dir = Path(config_file).parent
101+
log_path = config_dir / log_path
102+
103+
return log_path.resolve()
104+
105+
106+
def analyze_log_directory(log_dir):
107+
"""Analyze log directory and return detailed information."""
108+
print(f"=== Analyzing log directory ===")
109+
print(f"Log directory path: {log_dir}")
110+
print(f"Directory exists: {log_dir.exists()}")
111+
112+
if not log_dir.exists():
113+
print("✗ Log directory does not exist")
114+
return None
115+
116+
if not log_dir.is_dir():
117+
print("✗ Path is not a directory")
118+
return None
119+
120+
print(f"Directory readable: {os.access(log_dir, os.R_OK)}")
121+
122+
# Get directory contents
123+
try:
124+
all_items = list(log_dir.iterdir())
125+
files = [f for f in all_items if f.is_file()]
126+
dirs = [d for d in all_items if d.is_dir()]
127+
128+
print(f"Total items: {len(all_items)}")
129+
print(f"Files: {len(files)}")
130+
print(f"Subdirectories: {len(dirs)}")
131+
132+
if files:
133+
print("Files found:")
134+
for f in files[:10]: # Show first 10 files
135+
size = f.stat().st_size
136+
mtime = datetime.fromtimestamp(f.stat().st_mtime).strftime('%Y-%m-%d %H:%M:%S')
137+
print(f" - {f.name} ({size} bytes, modified: {mtime})")
138+
if len(files) > 10:
139+
print(f" ... and {len(files) - 10} more files")
140+
141+
if dirs:
142+
print("Subdirectories found:")
143+
for d in dirs[:5]: # Show first 5 directories
144+
print(f" - {d.name}/")
145+
if len(dirs) > 5:
146+
print(f" ... and {len(dirs) - 5} more directories")
147+
148+
return {'files': files, 'dirs': dirs, 'total': len(all_items)}
149+
150+
except PermissionError:
151+
print("✗ Permission denied accessing directory")
152+
return None
153+
except Exception as e:
154+
print(f"✗ Error accessing directory: {e}")
155+
return None
156+
157+
def create_log_archive(log_dir, output_file):
158+
"""Create tar.gz archive of all files in log directory."""
159+
analysis = analyze_log_directory(log_dir)
160+
if analysis is None:
161+
raise FileNotFoundError(f"Cannot access log directory '{log_dir}'")
162+
163+
print(f"\n=== Creating archive ===")
164+
print(f"Output file: {output_file}")
165+
166+
files = analysis['files']
167+
dirs = analysis['dirs']
168+
169+
with tarfile.open(output_file, 'w:gz') as tar:
170+
if files or dirs:
171+
for file_path in files:
172+
print(f"Adding file: {file_path.name}")
173+
tar.add(file_path, arcname=file_path.name)
174+
for dir_path in dirs:
175+
print(f"Adding directory: {dir_path.name}/")
176+
tar.add(dir_path, arcname=dir_path.name)
177+
else:
178+
print("No files to archive")
179+
180+
return analysis['total']
181+
182+
183+
def main():
184+
parser = argparse.ArgumentParser(
185+
description='Collect databend-meta logs based on config file',
186+
formatter_class=argparse.RawDescriptionHelpFormatter,
187+
epilog='Example: %(prog)s databend-meta-node-1.toml'
188+
)
189+
parser.add_argument('config_file', help='Path to databend-meta config file (.toml)')
190+
parser.add_argument('-o', '--output', help='Output archive filename (default: auto-generated)')
191+
192+
args = parser.parse_args()
193+
194+
try:
195+
# Parse config and extract log directory
196+
print(f"=== Processing config file ===")
197+
config_abs_path = Path(args.config_file).resolve()
198+
print(f"Config file absolute path: {config_abs_path}")
199+
print(f"Config file exists: {config_abs_path.exists()}")
200+
print(f"Config file readable: {os.access(config_abs_path, os.R_OK)}")
201+
202+
log_dir_str = parse_config(config_abs_path)
203+
print(f"Found log directory setting: '{log_dir_str}'")
204+
205+
# Resolve log directory path
206+
log_dir = resolve_log_dir(log_dir_str, config_abs_path)
207+
print(f"Resolved log directory: {log_dir}")
208+
209+
# Generate output filename if not provided
210+
print(f"\n=== Preparing output ===")
211+
if args.output:
212+
output_file = args.output
213+
print(f"Using provided output filename: {output_file}")
214+
else:
215+
config_name = Path(args.config_file).stem
216+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
217+
output_file = f"{config_name}_logs_{timestamp}.tar.gz"
218+
print(f"Generated output filename: {output_file}")
219+
220+
output_abs_path = Path(output_file).resolve()
221+
print(f"Output absolute path: {output_abs_path}")
222+
print(f"Output directory writable: {os.access(output_abs_path.parent, os.W_OK)}")
223+
224+
# Create log archive
225+
file_count = create_log_archive(log_dir, output_file)
226+
227+
print(f"\n=== Archive completed ===")
228+
if file_count > 0:
229+
file_size = os.path.getsize(output_file)
230+
size_mb = file_size / (1024 * 1024)
231+
print(f"✓ Successfully created log archive: {output_file}")
232+
print(f"✓ Items archived: {file_count}")
233+
print(f"✓ Archive size: {size_mb:.2f} MB")
234+
else:
235+
print(f"⚠ Warning: Log directory '{log_dir}' is empty")
236+
print(f"✓ Created empty archive: {output_file}")
237+
238+
except Exception as e:
239+
print(f"Error: {e}", file=sys.stderr)
240+
sys.exit(1)
241+
242+
243+
if __name__ == "__main__":
244+
main()

src/meta/service/src/store/store_inner.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,9 @@ impl RaftStoreInner {
309309
snapshot_stat :% = db.stat(); "do_build_snapshot complete");
310310

311311
{
312-
let mut sm = self.get_state_machine_write("do_build_snapshot").await;
312+
let mut sm = self
313+
.get_state_machine_write("do_build_snapshot-replace-compacted")
314+
.await;
313315
sm.levels_mut()
314316
.replace_with_compacted(compactor, db.clone());
315317
}

0 commit comments

Comments
 (0)