Skip to content

Commit a74ef8f

Browse files
committed
Add support for rpm2extents transcoder
Two related parts: 1. If `LIBREPO_TRANSCODE_RPMS` environment is set to a program (with parameters) then downloads are piped through it. 2. Transcoded RPMS by definition will not have the same bits on disk as downloaded. This is inherent. The transcoder is tasked with measuring the bits that enter stdin and storing a copy of the digest(s) seen in the footer. `librepo` can then use these stored digests instead if the environment variable is set. This is part of changes described in https://fedoraproject.org/wiki/Changes/RPMCoW
1 parent 365115b commit a74ef8f

File tree

3 files changed

+264
-4
lines changed

3 files changed

+264
-4
lines changed

librepo/checksum.c

Lines changed: 122 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
#define BUFFER_SIZE 2048
3939
#define MAX_CHECKSUM_NAME_LEN 7
4040

41+
/* magic value at end of file (64 bits) that indicates this is a transcoded rpm */
42+
#define MAGIC 3472329499408095051
43+
4144
LrChecksumType
4245
lr_checksum_type(const char *type)
4346
{
@@ -101,6 +104,114 @@ lr_checksum_type_to_str(LrChecksumType type)
101104
return NULL;
102105
}
103106

107+
char *
108+
lr_checksum_cow_fd(LrChecksumType type, int fd, GError **err)
109+
{
110+
struct __attribute__ ((__packed__)) csum_offset_magic {
111+
off64_t csum_offset;
112+
uint64_t magic;
113+
};
114+
struct __attribute__ ((__packed__)) orig_size_algos_len {
115+
ssize_t orig_size;
116+
uint32_t algos_len;
117+
};
118+
struct __attribute__ ((__packed__)) algo_len_digest_len {
119+
uint32_t algo_len;
120+
uint32_t digest_len;
121+
};
122+
123+
struct csum_offset_magic csum_offset_magic;
124+
struct orig_size_algos_len orig_size_algos_len;
125+
struct algo_len_digest_len algo_len_digest_len;
126+
char *algo, *checksum;
127+
unsigned char *digest;
128+
size_t len = sizeof(csum_offset_magic);
129+
130+
if (g_getenv("LIBREPO_TRANSCODE_RPMS") == NULL) {
131+
g_debug("Transcoding not enabled, skipping path");
132+
return NULL;
133+
}
134+
if (lseek(fd, -len, SEEK_END) == -1) {
135+
g_warning("seek for transcode failed, probably too small");
136+
return NULL;
137+
}
138+
if (read(fd, &csum_offset_magic, len) != len) {
139+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
140+
"Cannot read csum_offset, magic. size = %lu", len);
141+
return NULL;
142+
}
143+
if (csum_offset_magic.magic != MAGIC) {
144+
g_debug("Not transcoded");
145+
return NULL;
146+
}
147+
g_debug("Is transcoded");
148+
if (lseek(fd, csum_offset_magic.csum_offset, SEEK_SET) == -1) {
149+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
150+
"seek for transcode csum_offset failed");
151+
return NULL;
152+
}
153+
len = sizeof(orig_size_algos_len);
154+
if (read(fd, &orig_size_algos_len, len) != len) {
155+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
156+
"Cannot read orig_size_algos_len");
157+
return NULL;
158+
}
159+
while (orig_size_algos_len.algos_len > 0) {
160+
len = sizeof(algo_len_digest_len);
161+
if (read(fd, &algo_len_digest_len, len) != len) {
162+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
163+
"Cannot read algo_len_digest_len");
164+
return NULL;
165+
}
166+
167+
len = algo_len_digest_len.algo_len;
168+
algo = lr_malloc0(len + 1);
169+
if (algo == NULL) {
170+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
171+
"Cannot allocate %lu for algo", len + 1);
172+
return NULL;
173+
174+
}
175+
if (read(fd, algo, len) != len) {
176+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
177+
"Cannot read algo");
178+
free(algo);
179+
return NULL;
180+
}
181+
len = algo_len_digest_len.digest_len;
182+
digest = lr_malloc0(len);
183+
if (digest == NULL) {
184+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
185+
"Cannot allocate %lu for digest", len);
186+
free(algo);
187+
free(digest);
188+
return NULL;
189+
190+
}
191+
if (read(fd, digest, len) != len) {
192+
g_set_error(err, LR_CHECKSUM_ERROR, LRE_TRANSCODE,
193+
"Cannot read digest");
194+
free(algo);
195+
free(digest);
196+
return NULL;
197+
}
198+
if (lr_checksum_type(algo) == type) {
199+
/* found it, do the same as lr_checksum_fd does */
200+
checksum = lr_malloc0(sizeof(char) * (len * 2 + 1));
201+
for (size_t x = 0; x < len; x++) {
202+
sprintf(checksum+(x*2), "%02x", digest[x]);
203+
}
204+
free(algo);
205+
free(digest);
206+
return checksum;
207+
}
208+
free(algo);
209+
free(digest);
210+
orig_size_algos_len.algos_len--;
211+
}
212+
return NULL;
213+
}
214+
104215
char *
105216
lr_checksum_fd(LrChecksumType type, int fd, GError **err)
106217
{
@@ -244,9 +355,17 @@ lr_checksum_fd_compare(LrChecksumType type,
244355
}
245356
}
246357

247-
checksum = lr_checksum_fd(type, fd, err);
248-
if (!checksum)
249-
return FALSE;
358+
checksum = lr_checksum_cow_fd(type, fd, err);
359+
if (checksum) {
360+
// if checksum is found in CoW package, do not cache it in xattr
361+
// because looking this up is nearly constant time (cheap) but
362+
// is not valid when CoW is not enabled in RPM.
363+
caching = FALSE;
364+
} else {
365+
checksum = lr_checksum_fd(type, fd, err);
366+
if (!checksum)
367+
return FALSE;
368+
}
250369

251370
*matches = (strcmp(expected, checksum)) ? FALSE : TRUE;
252371

librepo/downloader.c

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <sys/types.h>
3333
#include <sys/stat.h>
3434
#include <sys/time.h>
35+
#include <sys/wait.h>
3536
#include <sys/xattr.h>
3637
#include <fcntl.h>
3738
#include <curl/curl.h>
@@ -150,6 +151,10 @@ typedef struct {
150151
FILE *f; /*!<
151152
fdopened file descriptor from LrDownloadTarget and used
152153
in curl_handle. */
154+
FILE *writef; /*!<
155+
the fd to write data to. Could be a subprocess. */
156+
pid_t pid; /*!<
157+
the pid of a transcoder. */
153158
char errorbuffer[CURL_ERROR_SIZE]; /*!<
154159
Error buffer used in curl handle */
155160
GSList *tried_mirrors; /*!<
@@ -613,7 +618,7 @@ lr_writecb(char *ptr, size_t size, size_t nmemb, void *userdata)
613618
if (range_start <= 0 && range_end <= 0) {
614619
// Write everything curl give to you
615620
target->writecb_recieved += all;
616-
return fwrite(ptr, size, nmemb, target->f);
621+
return fwrite(ptr, size, nmemb, target->writef);
617622
}
618623

619624
/* Deal with situation when user wants only specific byte range of the
@@ -1443,6 +1448,132 @@ open_target_file(LrTarget *target, GError **err)
14431448
return f;
14441449
}
14451450

1451+
/** Maybe transcode the file
1452+
*/
1453+
void
1454+
maybe_transcode(LrTarget *target, GError **err)
1455+
{
1456+
const char *e = g_getenv("LIBREPO_TRANSCODE_RPMS");
1457+
int transcoder_stdin[2], fd;
1458+
pid_t pid;
1459+
FILE *out;
1460+
_cleanup_strv_free_ gchar **args = NULL;
1461+
target->writef = NULL;
1462+
if (!e) {
1463+
g_debug("Not transcoding");
1464+
target->writef = target->f;
1465+
return;
1466+
}
1467+
if (g_str_has_suffix(target->target->path, ".rpm") == FALSE) {
1468+
g_debug("Not transcoding %s due to name", target->target->path);
1469+
target->writef = target->f;
1470+
return;
1471+
}
1472+
g_debug("Transcoding %s", target->target->path);
1473+
args = g_strsplit(e, " ", -1);
1474+
if (args[0] == NULL) {
1475+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1476+
"transcode env empty");
1477+
return;
1478+
}
1479+
if (pipe(transcoder_stdin) != 0) {
1480+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1481+
"input pipe creation failed: %s",
1482+
g_strerror(errno));
1483+
return;
1484+
}
1485+
if (fcntl(transcoder_stdin[1], F_SETFD, FD_CLOEXEC) != 0) {
1486+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1487+
"input pipe write close-on-fork failed: %s",
1488+
g_strerror(errno));
1489+
return;
1490+
}
1491+
pid = fork();
1492+
if (pid == -1) {
1493+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1494+
"fork failed: %s",
1495+
g_strerror(errno));
1496+
return;
1497+
}
1498+
if (pid == 0) {
1499+
/* child */
1500+
if (dup2(transcoder_stdin[0], STDIN_FILENO) == -1) {
1501+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1502+
"dup2 of stdin failed: %s",
1503+
g_strerror(errno));
1504+
return;
1505+
}
1506+
close(transcoder_stdin[0]);
1507+
close(transcoder_stdin[1]);
1508+
fd = fileno(target->f);
1509+
if (fd == -1) {
1510+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1511+
"fileno for target failed");
1512+
return;
1513+
}
1514+
if (dup2(fd, STDOUT_FILENO) == -1) {
1515+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1516+
"dup2 of stdout failed: %s",
1517+
g_strerror(errno));
1518+
return;
1519+
}
1520+
if (execv(args[0], args) == -1) {
1521+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1522+
"execv failed: %s", g_strerror(errno));
1523+
}
1524+
/* we never get here, but appease static analysis */
1525+
return;
1526+
} else {
1527+
/* parent */
1528+
close(transcoder_stdin[0]);
1529+
out = fdopen(transcoder_stdin[1], "w");
1530+
if (out == NULL) {
1531+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1532+
"fdopen failed: %s",
1533+
g_strerror(errno));
1534+
return;
1535+
}
1536+
target->pid = pid;
1537+
target->writef = out;
1538+
/* resuming a transcode is not yet implemented */
1539+
target->resume = FALSE;
1540+
}
1541+
}
1542+
1543+
void
1544+
cleanup_transcode(LrTarget *target, GError **err)
1545+
{
1546+
int wstatus, trc;
1547+
if (!target->writef) {
1548+
return;
1549+
}
1550+
if (target->writef == target->f) {
1551+
return;
1552+
}
1553+
fclose(target->writef);
1554+
if(waitpid(target->pid, &wstatus, 0) == -1) {
1555+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1556+
"transcode waitpid failed: %s", g_strerror(errno));
1557+
} else if (WIFEXITED(wstatus)) {
1558+
trc = WEXITSTATUS(wstatus);
1559+
if (trc != 0) {
1560+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1561+
"transcode process non-zero exit code %d", trc);
1562+
}
1563+
} else if (WIFSIGNALED(wstatus)) {
1564+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1565+
"transcode process was terminated with a signal: %d",
1566+
WTERMSIG(wstatus));
1567+
} else {
1568+
/* don't think this can happen, but covering all bases */
1569+
g_set_error(err, LR_DOWNLOADER_ERROR, LRE_TRANSCODE,
1570+
"transcode unhandled circumstance in waitpid");
1571+
}
1572+
target->writef = NULL;
1573+
/* pid is only valid if writef is not NULL */
1574+
/* target->pid = -1; */
1575+
}
1576+
14461577
/** Prepare next transfer
14471578
*/
14481579
static gboolean
@@ -1524,6 +1655,9 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
15241655
target->f = open_target_file(target, err);
15251656
if (!target->f)
15261657
goto fail;
1658+
maybe_transcode(target, err);
1659+
if (!target->writef)
1660+
goto fail;
15271661
target->writecb_recieved = 0;
15281662
target->writecb_required_range_written = FALSE;
15291663

@@ -1699,6 +1833,7 @@ prepare_next_transfer(LrDownload *dd, gboolean *candidatefound, GError **err)
16991833
curl_easy_cleanup(target->curl_handle);
17001834
target->curl_handle = NULL;
17011835
}
1836+
cleanup_transcode(target, err);
17021837
if (target->f != NULL) {
17031838
fclose(target->f);
17041839
target->f = NULL;
@@ -2269,6 +2404,8 @@ check_transfer_statuses(LrDownload *dd, GError **err)
22692404
if (transfer_err) // Transfer was unsuccessful
22702405
goto transfer_error;
22712406

2407+
cleanup_transcode(target, err);
2408+
22722409
//
22732410
// Checksum checking
22742411
//
@@ -2358,6 +2495,7 @@ check_transfer_statuses(LrDownload *dd, GError **err)
23582495
target->curl_handle = NULL;
23592496
g_free(target->headercb_interrupt_reason);
23602497
target->headercb_interrupt_reason = NULL;
2498+
cleanup_transcode(target, err);
23612499
fclose(target->f);
23622500
target->f = NULL;
23632501
if (target->curl_rqheaders) {
@@ -2761,6 +2899,7 @@ lr_download(GSList *targets,
27612899
curl_multi_remove_handle(dd.multi_handle, target->curl_handle);
27622900
curl_easy_cleanup(target->curl_handle);
27632901
target->curl_handle = NULL;
2902+
cleanup_transcode(target, err);
27642903
fclose(target->f);
27652904
target->f = NULL;
27662905
g_free(target->headercb_interrupt_reason);

librepo/rcodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ typedef enum {
125125
key/group not found, ...) */
126126
LRE_ZCK, /*!<
127127
(41) Zchunk error (error reading zchunk file, ...) */
128+
LRE_TRANSCODE, /*!<
129+
(42) Transcode error (env empty, ...) */
128130
LRE_UNKNOWNERROR, /*!<
129131
(xx) unknown error - sentinel of error codes enum */
130132
} LrRc; /*!< Return codes */

0 commit comments

Comments
 (0)