Skip to content

Commit 554b66b

Browse files
AnderGkolayne
andcommitted
Fix docker cp on a mounted volume
This is a work around docker's bug, see moby/moby#47964. Resolves #18 Co-authored-by: Nikolai Nechaev <[email protected]>
1 parent b92632a commit 554b66b

File tree

3 files changed

+129
-12
lines changed

3 files changed

+129
-12
lines changed

activemount.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
)
6+
7+
// activeMount is used to count the number of active mounts of a volume by a container.
8+
// Multiple mounts may be created, e.g., if `docker cp` is performed on a running container
9+
// with the volume mounted. Such requests are supposed to have unique mount IDs, however, due to
10+
// a bug in dockerd, they don't: https://github.com/moby/moby/issues/47964
11+
type activeMount struct {
12+
UsageCount int
13+
}
14+
15+
func (am *activeMount) mustMarshal() []byte {
16+
payload, err := json.Marshal(am)
17+
if err != nil {
18+
panic(err)
19+
}
20+
return payload
21+
}

driver.go

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package main
22

33
import (
4+
"encoding/json"
45
"errors"
56
"fmt"
67
"io"
@@ -166,9 +167,11 @@ func (d *DockerOnTop) Remove(request *volume.RemoveRequest) error {
166167
} else if os.IsNotExist(err) {
167168
// That's the default case: mountpoint does not exist
168169
} else if err != nil {
170+
log.Errorf("Volume %s to be removed does not seem mounted but the mountpoint exists and cannot be removed: %v",
171+
request.Name, err)
169172
log.Errorf("Volume %s to be removed does not seem mounted but still cannot be removed: %v",
170173
request.Name, err)
171-
return internalError("failed to remove the mountpoint directory of a seemingly unmounted volume",
174+
return internalError("failed to remove the mountpoint directory of a volume, which seems unmounted",
172175
err)
173176
}
174177

@@ -310,22 +313,47 @@ func (d *DockerOnTop) activateVolume(volumeName string, requestId string, active
310313
return internalError("failed to list activemounts/", err)
311314
}
312315

316+
var activeMountInfo activeMount
313317
activemountFilePath := d.activemountsdir(volumeName) + requestId
314-
f, err := os.Create(activemountFilePath)
318+
payload, err := os.ReadFile(activemountFilePath)
315319
if err == nil {
316-
// We don't care about the file's contents
317-
_ = f.Close()
318-
} else if os.IsExist(err) {
319-
// Super weird. I can't imagine why this would happen.
320-
log.Warningf("Active mount %s already exists (but it shouldn't...)", activemountFilePath)
320+
// The file may exists from a previous mount when doing a docker cp on an already running
321+
// container. Thus, no need to mount, just increment the counter.
322+
err = json.Unmarshal(payload, &activeMountInfo)
323+
if err != nil {
324+
log.Errorf("Failed to decode active mount file %s : %v", activemountFilePath, err)
325+
return internalError("failed to decode active mount file", err)
326+
}
327+
} else if os.IsNotExist(err) {
328+
// Default case, we need to create a new active mount
329+
activeMountInfo = activeMount{UsageCount: 0}
321330
} else {
331+
log.Errorf("Failed to read active mount file %s : %v", activemountFilePath, err)
332+
return internalError("failed to read active mount file", err)
333+
}
334+
335+
activeMountInfo.UsageCount++
336+
337+
// Here, intentionally separating file creation and writing (instead of using `os.WriteFile`)
338+
// to perform more careful error handling
339+
activemountFile, err := os.Create(activemountFilePath)
340+
if err != nil {
322341
// We have successfully mounted the overlay but failed to mark that we are using it.
323342
// If we use the volume now, we break the guarantee that we shall provide according
324343
// to the above note. Thus, refusing with an error.
325344
// We leave the overlay mounted as a harmless side effect.
326345
log.Errorf("While mounting volume %s, failed to create active mount file: %v", volumeName, err)
327346
return internalError("failed to create active mount file while mounting volume", err)
328347
}
348+
defer activemountFile.Close()
349+
350+
_, err = activemountFile.Write(activeMountInfo.mustMarshal())
351+
if err != nil {
352+
// This is an unfortunate situation. We have either created or truncated the active
353+
// mounts file, rendering it unreadable on future requests. There is not much we can do.
354+
log.Errorf("Failed to write to the active mount file %s : %v", activemountFilePath, err)
355+
return internalError("failed to write to the active mount file", err)
356+
}
329357

330358
return nil
331359
}
@@ -347,15 +375,47 @@ func (d *DockerOnTop) deactivateVolume(volumeName string, requestId string, acti
347375

348376
activemountFilePath := d.activemountsdir(volumeName) + requestId
349377

350-
err := os.Remove(activemountFilePath)
378+
var activeMountInfo activeMount
379+
payload, err := os.ReadFile(activemountFilePath)
351380
if os.IsNotExist(err) {
352-
log.Warningf("Failed to remove %s because it does not exist (but it should...)", activemountFilePath)
381+
log.Warningf("Failed to read&remove %s because it does not exist (but it should...)", activemountFilePath)
382+
// Assuming we are the only user with this mount ID
383+
activeMountInfo = activeMount{UsageCount: 1}
353384
} else if err != nil {
354-
log.Errorf("Failed to remove active mounts file %s. The volume %s is now stuck in the active state",
385+
log.Errorf("Failed to read active mounts file %s . The volume %s is now stuck in the active state",
355386
activemountFilePath, volumeName)
356-
// The user most likely won't see this error message due to daemon not showing unmount errors to the
387+
// The user most likely won't see this error message because daemon does not show unmount errors to the
357388
// `docker run` clients :((
358-
return internalError("failed to remove the active mount file; the volume is now stuck in the active state", err)
389+
return internalError("failed to read the active mouint file; the volume is now stuck in the active state", err)
390+
} else {
391+
err = json.Unmarshal(payload, &activeMountInfo)
392+
if err != nil {
393+
log.Errorf("Failed to decode active mount file %s : %v", activemountFilePath, err)
394+
return internalError("failed to decode active mount file", err)
395+
}
396+
}
397+
398+
activeMountInfo.UsageCount--
399+
if activeMountInfo.UsageCount == 0 {
400+
err = os.Remove(activemountFilePath)
401+
if os.IsNotExist(err) {
402+
// It's ok, already logged above
403+
} else if err != nil {
404+
log.Errorf("Failed to remove active mounts file %s : %v; the volume is now stuck in the active state",
405+
activemountFilePath, err)
406+
// The user most likely won't see this error message because the daemon does not show unmount errors
407+
// to the `docker run` clients :((
408+
return internalError("failed to remove the active mount file; the volume is now stuck in the active state", err)
409+
}
410+
} else {
411+
err = os.WriteFile(activemountFilePath, activeMountInfo.mustMarshal(), 0o644)
412+
if err != nil {
413+
log.Errorf("Failed to write to active mount file %s : %v; the file may have been corrupted")
414+
return internalError("failed to write to active mount file (potentially corrupting)", err)
415+
}
416+
log.Debugf("Volume %s is still used by the same container. Indicating success without unmounting",
417+
volumeName)
418+
return nil
359419
}
360420

361421
_, err = activemountsdir.ReadDir(1) // Check if there is any container using the volume (after us)

tests/docker_cp.bats

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env bats
2+
3+
# For `CONTAINER_CMD_*`
4+
load common.sh
5+
6+
@test "Docker cp-ing from a container" {
7+
# https://github.com/kolayne/docker-on-top/issues/18
8+
9+
BASE="$(mktemp --directory)"
10+
NAME="$(basename "$BASE")"
11+
docker volume create --driver docker-on-top "$NAME" -o base="$BASE" -o volatile=true
12+
13+
# Deferred cleanup
14+
trap 'rm -rf "$BASE"; docker container rm -f "$CONTAINER_ID"; docker volume rm "$NAME"; trap - RETURN' RETURN
15+
16+
CONTAINER_ID=$(docker run --name "$NAME" -d -v "$NAME":/dot alpine:latest sh -e -c '
17+
echo 123 > /dot/b
18+
sleep 1
19+
# What the following checks is that after the second container is started, the overlay upperdir
20+
# is not cleared (which is what was happening before the issue was fixed). However, there is no
21+
# reliable way to test this, since what exactly happens in the errorneous case depends on the
22+
# implementation of the overlay filesystem.
23+
# For me, this seems to work (but just `cat /dot/b` does not fail):
24+
ls /dot | grep b && cat /dot/b
25+
')
26+
27+
sleep 0.5
28+
29+
# It does not matter where we copy to, but I put it on the volume
30+
# to simplify the cleanup.
31+
docker cp "$NAME":/etc/passwd "$BASE/"
32+
33+
docker run --rm -v "$NAME":/dot alpine:latest true
34+
35+
[ 0 -eq "$(docker wait "$CONTAINER_ID")" ]
36+
}

0 commit comments

Comments
 (0)