Skip to content

Commit 9351a9a

Browse files
committed
Add support for rpm2extents transcoder
Two related parts: 1. If `LIBREPO_TRANSCODE_RPMS` environment is set to a program (with parameters) then downloads are piped through it. 2. Transcoded RPMS by definition will not have the same bits on disk as downloaded. This is inherent. The transcoder is tasked with measuring the bits that enter stdin and storing a copy of the digest(s) seen in the footer. `librepo` can then use these stored digests instead if the environment variable is set. This is part of changes described in https://fedoraproject.org/wiki/Changes/RPMCoW
1 parent a0752e3 commit 9351a9a

File tree

3 files changed

+254
-4
lines changed

3 files changed

+254
-4
lines changed

librepo/checksum.c

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
#define BUFFER_SIZE 2048
3939
#define MAX_CHECKSUM_NAME_LEN 7
4040

41+
/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
42+
#define MAGIC 3472329499408095051
43+
4144
LrChecksumType
4245
lr_checksum_type(const char *type)
4346
{
@@ -101,6 +104,100 @@ lr_checksum_type_to_str(LrChecksumType type)
101104
return NULL;
102105
}
103106

107+
char *
108+
lr_checksum_cow_fd(LrChecksumType type, int fd, GError **err)
109+
{
110+
struct __attribute__ ((__packed__)) csum_offset_magic {
111+
off64_t csum_offset;
112+
uint64_t magic;
113+
};
114+
struct __attribute__ ((__packed__)) orig_size_algos_len {
115+
ssize_t orig_size;
116+
uint32_t algos_len;
117+
};
118+
struct __attribute__ ((__packed__)) algo_len_digest_len {
119+
uint32_t algo_len;
120+
uint32_t digest_len;
121+
};
122+
123+
struct csum_offset_magic csum_offset_magic;
124+
struct orig_size_algos_len orig_size_algos_len;
125+
struct algo_len_digest_len algo_len_digest_len;
126+
char *algo, *checksum;
127+
unsigned char *digest;
128+
size_t len = sizeof(csum_offset_magic);
129+
130+
if (g_getenv("LIBREPO_TRANSCODE_RPMS") == NULL) {
131+
g_debug("Transcoding not enabled, skipping path");
132+
return NULL;
133+
}
134+
if (lseek(fd, -len, SEEK_END) == -1) {
135+
g_warning("seek for transcode failed, probably too small");
136+
return NULL;
137+
}
138+
if (read(fd, &csum_offset_magic, len) != len) {
139+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
140+
"Cannot read csum_offset, magic. size = %lu", len);
141+
return NULL;
142+
}
143+
if (csum_offset_magic.magic != MAGIC) {
144+
g_debug("Not transcoded");
145+
return NULL;
146+
}
147+
g_debug("Is transcoded");
148+
if (lseek(fd, csum_offset_magic.csum_offset, SEEK_SET) == -1) {
149+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
150+
"seek for transcode csum_offset failed");
151+
return NULL;
152+
}
153+
len = sizeof(orig_size_algos_len);
154+
if (read(fd, &orig_size_algos_len, len) != len) {
155+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
156+
"Cannot read orig_size_algos_len");
157+
return NULL;
158+
}
159+
while (orig_size_algos_len.algos_len > 0) {
160+
len = sizeof(algo_len_digest_len);
161+
if (read(fd, &algo_len_digest_len, len) != len) {
162+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
163+
"Cannot read algo_len_digest_len");
164+
return NULL;
165+
}
166+
167+
len = algo_len_digest_len.algo_len;
168+
algo = lr_malloc0(len + 1);
169+
if (read(fd, algo, len) != len) {
170+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
171+
"Cannot read algo");
172+
lr_free(algo);
173+
return NULL;
174+
}
175+
len = algo_len_digest_len.digest_len;
176+
digest = lr_malloc0(len);
177+
if (read(fd, digest, len) != len) {
178+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
179+
"Cannot read digest");
180+
lr_free(algo);
181+
lr_free(digest);
182+
return NULL;
183+
}
184+
if (lr_checksum_type(algo) == type) {
185+
/* found it, do the same as lr_checksum_fd does */
186+
checksum = lr_malloc0(sizeof(char) * (len * 2 + 1));
187+
for (size_t x = 0; x < len; x++) {
188+
sprintf(checksum+(x*2), "%02x", digest[x]);
189+
}
190+
lr_free(algo);
191+
lr_free(digest);
192+
return checksum;
193+
}
194+
lr_free(algo);
195+
lr_free(digest);
196+
orig_size_algos_len.algos_len--;
197+
}
198+
return NULL;
199+
}
200+
104201
char *
105202
lr_checksum_fd(LrChecksumType type, int fd, GError **err)
106203
{
@@ -244,9 +341,17 @@ lr_checksum_fd_compare(LrChecksumType type,
244341
}
245342
}
246343

247-
checksum = lr_checksum_fd(type, fd, err);
248-
if (!checksum)
249-
return FALSE;
344+
checksum = lr_checksum_cow_fd(type, fd, err);
345+
if (checksum) {
346+
// if checksum is found in CoW package, do not cache it in xattr
347+
// because looking this up is nearly constant time (cheap) but
348+
// is not valid when CoW is not enabled in RPM.
349+
caching = FALSE;
350+
} else {
351+
checksum = lr_checksum_fd(type, fd, err);
352+
if (!checksum)
353+
return FALSE;
354+
}
250355

251356
*matches = (strcmp(expected, checksum)) ? FALSE : TRUE;
252357

librepo/downloader.c

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <sys/types.h>
3333
#include <sys/stat.h>
3434
#include <sys/time.h>
35+
#include <sys/wait.h>
3536
#include <sys/xattr.h>
3637
#include <fcntl.h>
3738
#include <curl/curl.h>
@@ -150,6 +151,10 @@ typedef struct {
150151
FILE *f; /*!<
151152
fdopened file descriptor from LrDownloadTarget and used
152153
in curl_handle. */
154+
FILE *writef; /*!<
155+
the fd to write data to. Could be a subprocess. */
156+
pid_t pid; /*!<
157+
the pid of a transcoder. */
153158
char errorbuffer[CURL_ERROR_SIZE]; /*!<
154159
Error buffer used in curl handle */
155160
GSList *tried_mirrors; /*!<
@@ -613,7 +618,7 @@ lr_writecb(char *ptr, size_t size, size_t nmemb, void *userdata)
613618
if (range_start <= 0 && range_end <= 0) {
614619
// Write everything curl give to you
615620
target->writecb_recieved += all;
616-
return fwrite(ptr, size, nmemb, target->f);
621+
return fwrite(ptr, size, nmemb, target->writef);
617622
}
618623

619624
/* Deal with situation when user wants only specific byte range of the
@@ -1443,6 +1448,136 @@ open_target_file(LrTarget *target, GError **err)
14431448
return f;
14441449
}
14451450

1451+
/** Maybe transcode the file
1452+
*/
1453+
void
1454+
maybe_transcode(LrTarget *target, GError **err)
1455+
{
1456+
const char *e = g_getenv("LIBREPO_TRANSCODE_RPMS");
1457+
int transcoder_stdin[2], fd;
1458+
pid_t pid;
1459+
FILE *out;
1460+
_cleanup_strv_free_ gchar **args = NULL;
1461+
target->writef = NULL;
1462+
if (!e) {
1463+
g_debug("Not transcoding");
1464+
target->writef = target->f;
1465+
return;
1466+
}
1467+
if (g_str_has_suffix(target->target->path, ".rpm") == FALSE) {
1468+
g_debug("Not transcoding %s due to name", target->target->path);
1469+
target->writef = target->f;
1470+
return;
1471+
}
1472+
g_debug("Transcoding %s", target->target->path);
1473+
args = g_strsplit(e, " ", -1);
1474+
if (args[0] == NULL) {
1475+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1476+
"transcode env empty");
1477+
return;
1478+
}
1479+
if (pipe(transcoder_stdin) != 0) {
1480+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1481+
"input pipe creation failed: %s",
1482+
g_strerror(errno));
1483+
return;
1484+
}
1485+
/** librepo collects the 'write' ends of the pipes. We must mark these as
1486+
* FD_CLOEXEC so a second download/transcode does not inherit them and
1487+
* hold them open, as it'll prevent an EOF and cause a deadlock.
1488+
*/
1489+
if (fcntl(transcoder_stdin[1], F_SETFD, FD_CLOEXEC) != 0) {
1490+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1491+
"input pipe write close-on-fork failed: %s",
1492+
g_strerror(errno));
1493+
return;
1494+
}
1495+
pid = fork();
1496+
if (pid == -1) {
1497+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1498+
"fork failed: %s",
1499+
g_strerror(errno));
1500+
return;
1501+
}
1502+
if (pid == 0) {
1503+
/* child */
1504+
if (dup2(transcoder_stdin[0], STDIN_FILENO) == -1) {
1505+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1506+
"dup2 of stdin failed: %s",
1507+
g_strerror(errno));
1508+
return;
1509+
}
1510+
close(transcoder_stdin[0]);
1511+
close(transcoder_stdin[1]);
1512+
fd = fileno(target->f);
1513+
if (fd == -1) {
1514+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1515+
"fileno for target failed");
1516+
return;
1517+
}
1518+
if (dup2(fd, STDOUT_FILENO) == -1) {
1519+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1520+
"dup2 of stdout failed: %s",
1521+
g_strerror(errno));
1522+
return;
1523+
}
1524+
if (execv(args[0], args) == -1) {
1525+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1526+
"execv failed: %s", g_strerror(errno));
1527+
}
1528+
/* we never get here, but appease static analysis */
1529+
return;
1530+
} else {
1531+
/* parent */
1532+
close(transcoder_stdin[0]);
1533+
out = fdopen(transcoder_stdin[1], "w");
1534+
if (out == NULL) {
1535+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1536+
"fdopen failed: %s",
1537+
g_strerror(errno));
1538+
return;
1539+
}
1540+
target->pid = pid;
1541+
target->writef = out;
1542+
/* resuming a transcode is not yet implemented */
1543+
target->resume = FALSE;
1544+
}
1545+
}
1546+
1547+
void
1548+
cleanup_transcode(LrTarget *target, GError **err)
1549+
{
1550+
int wstatus, trc;
1551+
if (!target->writef) {
1552+
return;
1553+
}
1554+
if (target->writef == target->f) {
1555+
return;
1556+
}
1557+
fclose(target->writef);
1558+
if(waitpid(target->pid, &wstatus, 0) == -1) {
1559+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1560+
"transcode waitpid failed: %s", g_strerror(errno));
1561+
} else if (WIFEXITED(wstatus)) {
1562+
trc = WEXITSTATUS(wstatus);
1563+
if (trc != 0) {
1564+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1565+
"transcode process non-zero exit code %d", trc);
1566+
}
1567+
} else if (WIFSIGNALED(wstatus)) {
1568+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1569+
"transcode process was terminated with a signal: %d",
1570+
WTERMSIG(wstatus));
1571+
} else {
1572+
/* don't think this can happen, but covering all bases */
1573+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1574+
"transcode unhandled circumstance in waitpid");
1575+
}
1576+
target->writef = NULL;
1577+
/* pid is only valid if writef is not NULL */
1578+
/* target->pid = -1; */
1579+
}
1580+
14461581
/** Prepare next transfer
14471582
*/
14481583
static gboolean
@@ -1524,6 +1659,9 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
15241659
target->f = open_target_file(target, err);
15251660
if (!target->f)
15261661
goto fail;
1662+
maybe_transcode(target, err);
1663+
if (!target->writef)
1664+
goto fail;
15271665
target->writecb_recieved = 0;
15281666
target->writecb_required_range_written = FALSE;
15291667

@@ -1699,6 +1837,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
16991837
curl_easy_cleanup(target->curl_handle);
17001838
target->curl_handle = NULL;
17011839
}
1840+
cleanup_transcode(target, err);
17021841
if (target->f != NULL) {
17031842
fclose(target->f);
17041843
target->f = NULL;
@@ -2269,6 +2408,8 @@ check_transfer_statuses(LrDownload *dd, GError **err)
22692408
if (transfer_err) // Transfer was unsuccessful
22702409
goto transfer_error;
22712410

2411+
cleanup_transcode(target, err);
2412+
22722413
//
22732414
// Checksum checking
22742415
//
@@ -2358,6 +2499,7 @@ check_transfer_statuses(LrDownload *dd, GError **err)
23582499
target->curl_handle = NULL;
23592500
g_free(target->headercb_interrupt_reason);
23602501
target->headercb_interrupt_reason = NULL;
2502+
cleanup_transcode(target, err);
23612503
fclose(target->f);
23622504
target->f = NULL;
23632505
if (target->curl_rqheaders) {
@@ -2761,6 +2903,7 @@ lr_download(GSList *targets,
27612903
curl_multi_remove_handle(dd.multi_handle, target->curl_handle);
27622904
curl_easy_cleanup(target->curl_handle);
27632905
target->curl_handle = NULL;
2906+
cleanup_transcode(target, err);
27642907
fclose(target->f);
27652908
target->f = NULL;
27662909
g_free(target->headercb_interrupt_reason);

librepo/rcodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ typedef enum {
125125
key/group not found, ...) */
126126
LRE_ZCK, /*!<
127127
(41) Zchunk error (error reading zchunk file, ...) */
128+
LRE_TRANSCODE, /*!<
129+
(42) Transcode error (env empty, ...) */
128130
LRE_UNKNOWNERROR, /*!<
129131
(xx) unknown error - sentinel of error codes enum */
130132
} LrRc; /*!< Return codes */

0 commit comments

Comments
 (0)