Skip to content

Commit db4a1bf

Browse files
feat: Add pilot pilot logging (legacy and DiracX)
1 parent ff988d4 commit db4a1bf

File tree

3 files changed

+134
-74
lines changed

3 files changed

+134
-74
lines changed

Pilot/dirac-pilot.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,25 +65,34 @@
6565
# print the buffer, so we have a "classic' logger back in sync.
6666
sys.stdout.write(bufContent)
6767
# now the remote logger.
68-
remote = pilotParams.pilotLogging and (pilotParams.loggerURL is not None)
69-
if remote:
68+
remote = pilotParams.pilotLogging and pilotParams.diracXServer
69+
if remote and pilotParams.jwt != {}:
7070
# In a remote logger enabled Dirac version we would have some classic logger content from a wrapper,
7171
# which we passed in:
7272
receivedContent = ""
7373
if not sys.stdin.isatty():
7474
receivedContent = sys.stdin.read()
75+
7576
log = RemoteLogger(
76-
pilotParams.loggerURL,
77+
pilotParams.diracXServer,
7778
"Pilot",
7879
bufsize=pilotParams.loggerBufsize,
7980
pilotUUID=pilotParams.pilotUUID,
8081
debugFlag=pilotParams.debugFlag,
81-
wnVO=pilotParams.wnVO,
82+
jwt=pilotParams.jwt,
83+
legacy_logging=pilotParams.isLegacyLogging,
84+
clientID=pilotParams.clientID
8285
)
8386
log.info("Remote logger activated")
84-
log.buffer.write(receivedContent)
87+
log.buffer.write(log.format_to_json(
88+
"INFO",
89+
receivedContent,
90+
))
8591
log.buffer.flush()
86-
log.buffer.write(bufContent)
92+
log.buffer.write(log.format_to_json(
93+
"INFO",
94+
bufContent,
95+
))
8796
else:
8897
log = Logger("Pilot", debugFlag=pilotParams.debugFlag)
8998

@@ -106,7 +115,7 @@
106115

107116
log.info("Executing commands: %s" % str(pilotParams.commands))
108117

109-
if remote:
118+
if remote and pilotParams.jwt:
110119
# It's safer to cancel the timer here. Each command has got its own logger object with a timer cancelled by the
111120
# finaliser. No need for a timer in the "else" code segment below.
112121
try:
@@ -124,13 +133,16 @@
124133
log.error("Command %s could not be instantiated" % commandName)
125134
# send the last message and abandon ship.
126135
if remote:
127-
log.buffer.flush()
136+
log.buffer.flush(force=True)
128137
sys.exit(-1)
129-
138+
130139
log.info("Pilot tasks finished.")
131140

132141
if pilotParams.jwt:
133-
if not pilotParams.isLegacyPilot:
142+
if remote:
143+
log.buffer.flush(force=True)
144+
145+
if not pilotParams.isLegacyLogging:
134146
log.info("Revoking pilot token.")
135147
revokePilotToken(
136148
pilotParams.diracXServer,

Pilot/pilotCommands.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ def __init__(self, pilotParams):
2828
import sys
2929
import time
3030
import traceback
31-
import subprocess
3231
from collections import Counter
3332

3433
############################
@@ -44,7 +43,6 @@ def __init__(self, pilotParams):
4443
from shlex import quote
4544
except ImportError:
4645
from pipes import quote
47-
4846
try:
4947
from Pilot.pilotTools import (
5048
CommandBase,
@@ -92,16 +90,20 @@ def wrapper(self):
9290
self.log.info(
9391
"Flushing the remote logger buffer for pilot on sys.exit(): %s (exit code:%s)" % (pRef, str(exCode))
9492
)
95-
self.log.buffer.flush() # flush the buffer unconditionally (on sys.exit()).
93+
9694
try:
97-
sendMessage(self.log.url, self.log.pilotUUID, self.log.wnVO, "finaliseLogs", {"retCode": str(exCode)})
95+
self.log.error(str(exCode))
96+
self.log.error(traceback.format_exc())
97+
self.log.buffer.flush(force=True)
9898
except Exception as exc:
9999
self.log.error("Remote logger couldn't be finalised %s " % str(exc))
100+
100101
raise
101102
except Exception as exc:
102103
# unexpected exit: document it and bail out.
103104
self.log.error(str(exc))
104105
self.log.error(traceback.format_exc())
106+
self.log.buffer.flush(force=True)
105107
raise
106108
finally:
107109
self.log.buffer.cancelTimer()
@@ -132,7 +134,6 @@ def __init__(self, pilotParams):
132134
@logFinalizer
133135
def execute(self):
134136
"""Get host and local user info, and other basic checks, e.g. space available"""
135-
136137
self.log.info("Uname = %s" % " ".join(os.uname()))
137138
self.log.info("Host Name = %s" % socket.gethostname())
138139
self.log.info("Host FQDN = %s" % socket.getfqdn())
@@ -1126,8 +1127,6 @@ def execute(self):
11261127
self.__setInnerCEOpts()
11271128
self.__startJobAgent()
11281129

1129-
sys.exit(0)
1130-
11311130

11321131
class NagiosProbes(CommandBase):
11331132
"""Run one or more Nagios probe scripts that follow the Nagios Plugin API:

0 commit comments

Comments
 (0)