1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ import tarfile
6
+ import argparse
7
+ import subprocess
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+
11
+ def ensure_tomli_available ():
12
+ """Ensure tomli is available, install if needed."""
13
+ print ("=== Checking dependencies ===" )
14
+ print (f"Python executable: { sys .executable } " )
15
+ print (f"Python version: { sys .version } " )
16
+
17
+ try :
18
+ import tomli
19
+ print ("✓ tomli already available" )
20
+ return tomli
21
+ except ImportError :
22
+ print ("✗ tomli not found, installing..." )
23
+ try :
24
+ print (f"Installing tomli using: { sys .executable } -m pip install tomli" )
25
+ subprocess .check_call ([sys .executable , '-m' , 'pip' , 'install' , 'tomli' ],
26
+ stdout = subprocess .PIPE , stderr = subprocess .PIPE )
27
+ print ("✓ tomli installed successfully" )
28
+ import tomli
29
+ print ("✓ tomli imported successfully" )
30
+ return tomli
31
+ except subprocess .CalledProcessError as e :
32
+ print (f"✗ Failed to install tomli: { e } " )
33
+ print ("Please install manually: pip install tomli" )
34
+ sys .exit (1 )
35
+ except ImportError :
36
+ print ("✗ Failed to import tomli after installation" )
37
+ sys .exit (1 )
38
+
39
+ # Install tomli at startup
40
+ print ("Databend Meta Log Collector" )
41
+ print ("============================" )
42
+ tomli = ensure_tomli_available ()
43
+ print ()
44
+
45
+
46
+ def parse_config (config_file ):
47
+ """Parse TOML config file and extract log directory."""
48
+ try :
49
+ with open (config_file , 'rb' ) as f :
50
+ config = tomli .load (f )
51
+
52
+ # Check for log directory in two possible locations:
53
+ # 1. Top-level: log_dir = "..."
54
+ # 2. In [log.file] section: dir = "..."
55
+
56
+ top_level_log_dir = config .get ('log_dir' )
57
+
58
+ log_config = config .get ('log' , {})
59
+ file_config = log_config .get ('file' , {})
60
+ nested_log_dir = file_config .get ('dir' )
61
+
62
+ print (f"Top-level log_dir: { top_level_log_dir } " )
63
+ print (f"[log.file].dir: { nested_log_dir } " )
64
+
65
+ # Validate configuration
66
+ if top_level_log_dir and nested_log_dir :
67
+ if top_level_log_dir != nested_log_dir :
68
+ raise ValueError (
69
+ f"Conflicting log directory settings found:\n "
70
+ f" log_dir = '{ top_level_log_dir } '\n "
71
+ f" [log.file].dir = '{ nested_log_dir } '\n "
72
+ f"Please use only one log directory configuration."
73
+ )
74
+ print ("✓ Both log directory settings present and match" )
75
+ return top_level_log_dir
76
+ elif top_level_log_dir :
77
+ print ("✓ Using top-level log_dir setting" )
78
+ return top_level_log_dir
79
+ elif nested_log_dir :
80
+ print ("✓ Using [log.file].dir setting" )
81
+ return nested_log_dir
82
+ else :
83
+ raise ValueError (
84
+ "No log directory found in config file. "
85
+ "Please set either 'log_dir' or '[log.file].dir'"
86
+ )
87
+
88
+ except FileNotFoundError :
89
+ raise FileNotFoundError (f"Config file '{ config_file } ' not found" )
90
+ except Exception as e :
91
+ raise ValueError (f"Error parsing config file: { e } " )
92
+
93
+
94
+ def resolve_log_dir (log_dir , config_file ):
95
+ """Resolve log directory path (handle relative paths)."""
96
+ log_path = Path (log_dir )
97
+
98
+ if not log_path .is_absolute ():
99
+ # Resolve relative to config file directory
100
+ config_dir = Path (config_file ).parent
101
+ log_path = config_dir / log_path
102
+
103
+ return log_path .resolve ()
104
+
105
+
106
+ def analyze_log_directory (log_dir ):
107
+ """Analyze log directory and return detailed information."""
108
+ print (f"=== Analyzing log directory ===" )
109
+ print (f"Log directory path: { log_dir } " )
110
+ print (f"Directory exists: { log_dir .exists ()} " )
111
+
112
+ if not log_dir .exists ():
113
+ print ("✗ Log directory does not exist" )
114
+ return None
115
+
116
+ if not log_dir .is_dir ():
117
+ print ("✗ Path is not a directory" )
118
+ return None
119
+
120
+ print (f"Directory readable: { os .access (log_dir , os .R_OK )} " )
121
+
122
+ # Get directory contents
123
+ try :
124
+ all_items = list (log_dir .iterdir ())
125
+ files = [f for f in all_items if f .is_file ()]
126
+ dirs = [d for d in all_items if d .is_dir ()]
127
+
128
+ print (f"Total items: { len (all_items )} " )
129
+ print (f"Files: { len (files )} " )
130
+ print (f"Subdirectories: { len (dirs )} " )
131
+
132
+ if files :
133
+ print ("Files found:" )
134
+ for f in files [:10 ]: # Show first 10 files
135
+ size = f .stat ().st_size
136
+ mtime = datetime .fromtimestamp (f .stat ().st_mtime ).strftime ('%Y-%m-%d %H:%M:%S' )
137
+ print (f" - { f .name } ({ size } bytes, modified: { mtime } )" )
138
+ if len (files ) > 10 :
139
+ print (f" ... and { len (files ) - 10 } more files" )
140
+
141
+ if dirs :
142
+ print ("Subdirectories found:" )
143
+ for d in dirs [:5 ]: # Show first 5 directories
144
+ print (f" - { d .name } /" )
145
+ if len (dirs ) > 5 :
146
+ print (f" ... and { len (dirs ) - 5 } more directories" )
147
+
148
+ return {'files' : files , 'dirs' : dirs , 'total' : len (all_items )}
149
+
150
+ except PermissionError :
151
+ print ("✗ Permission denied accessing directory" )
152
+ return None
153
+ except Exception as e :
154
+ print (f"✗ Error accessing directory: { e } " )
155
+ return None
156
+
157
+ def create_log_archive (log_dir , output_file ):
158
+ """Create tar.gz archive of all files in log directory."""
159
+ analysis = analyze_log_directory (log_dir )
160
+ if analysis is None :
161
+ raise FileNotFoundError (f"Cannot access log directory '{ log_dir } '" )
162
+
163
+ print (f"\n === Creating archive ===" )
164
+ print (f"Output file: { output_file } " )
165
+
166
+ files = analysis ['files' ]
167
+ dirs = analysis ['dirs' ]
168
+
169
+ with tarfile .open (output_file , 'w:gz' ) as tar :
170
+ if files or dirs :
171
+ for file_path in files :
172
+ print (f"Adding file: { file_path .name } " )
173
+ tar .add (file_path , arcname = file_path .name )
174
+ for dir_path in dirs :
175
+ print (f"Adding directory: { dir_path .name } /" )
176
+ tar .add (dir_path , arcname = dir_path .name )
177
+ else :
178
+ print ("No files to archive" )
179
+
180
+ return analysis ['total' ]
181
+
182
+
183
+ def main ():
184
+ parser = argparse .ArgumentParser (
185
+ description = 'Collect databend-meta logs based on config file' ,
186
+ formatter_class = argparse .RawDescriptionHelpFormatter ,
187
+ epilog = 'Example: %(prog)s databend-meta-node-1.toml'
188
+ )
189
+ parser .add_argument ('config_file' , help = 'Path to databend-meta config file (.toml)' )
190
+ parser .add_argument ('-o' , '--output' , help = 'Output archive filename (default: auto-generated)' )
191
+
192
+ args = parser .parse_args ()
193
+
194
+ try :
195
+ # Parse config and extract log directory
196
+ print (f"=== Processing config file ===" )
197
+ config_abs_path = Path (args .config_file ).resolve ()
198
+ print (f"Config file absolute path: { config_abs_path } " )
199
+ print (f"Config file exists: { config_abs_path .exists ()} " )
200
+ print (f"Config file readable: { os .access (config_abs_path , os .R_OK )} " )
201
+
202
+ log_dir_str = parse_config (config_abs_path )
203
+ print (f"Found log directory setting: '{ log_dir_str } '" )
204
+
205
+ # Resolve log directory path
206
+ log_dir = resolve_log_dir (log_dir_str , config_abs_path )
207
+ print (f"Resolved log directory: { log_dir } " )
208
+
209
+ # Generate output filename if not provided
210
+ print (f"\n === Preparing output ===" )
211
+ if args .output :
212
+ output_file = args .output
213
+ print (f"Using provided output filename: { output_file } " )
214
+ else :
215
+ config_name = Path (args .config_file ).stem
216
+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
217
+ output_file = f"{ config_name } _logs_{ timestamp } .tar.gz"
218
+ print (f"Generated output filename: { output_file } " )
219
+
220
+ output_abs_path = Path (output_file ).resolve ()
221
+ print (f"Output absolute path: { output_abs_path } " )
222
+ print (f"Output directory writable: { os .access (output_abs_path .parent , os .W_OK )} " )
223
+
224
+ # Create log archive
225
+ file_count = create_log_archive (log_dir , output_file )
226
+
227
+ print (f"\n === Archive completed ===" )
228
+ if file_count > 0 :
229
+ file_size = os .path .getsize (output_file )
230
+ size_mb = file_size / (1024 * 1024 )
231
+ print (f"✓ Successfully created log archive: { output_file } " )
232
+ print (f"✓ Items archived: { file_count } " )
233
+ print (f"✓ Archive size: { size_mb :.2f} MB" )
234
+ else :
235
+ print (f"⚠ Warning: Log directory '{ log_dir } ' is empty" )
236
+ print (f"✓ Created empty archive: { output_file } " )
237
+
238
+ except Exception as e :
239
+ print (f"Error: { e } " , file = sys .stderr )
240
+ sys .exit (1 )
241
+
242
+
243
+ if __name__ == "__main__" :
244
+ main ()
0 commit comments