From d656f83c87fc698e8a023b39dbfa6418943a2045 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Tue, 22 Jul 2025 20:03:42 +0800 Subject: [PATCH] fix: download directories with http fs Signed-off-by: Frost Ming --- fsspec/implementations/http.py | 10 +++++++--- fsspec/implementations/tests/test_http.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index ea8d79d46..cd81822ce 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -1,10 +1,11 @@ import asyncio import io import logging +import os import re import weakref from copy import copy -from urllib.parse import urlparse +from urllib.parse import unquote, urlparse import aiohttp import yarl @@ -156,7 +157,7 @@ async def _ls_real(self, url, detail=True, **kwargs): kw.update(kwargs) logger.debug(url) session = await self.set_session() - async with session.get(self.encode_url(url), **self.kwargs) as r: + async with session.get(self.encode_url(url), **kw) as r: self._raise_not_found_for_status(r, url) if "Content-Type" in r.headers: @@ -253,6 +254,9 @@ async def _get_file( kw.update(kwargs) logger.debug(rpath) session = await self.set_session() + if await self._isdir(rpath): + os.makedirs(unquote(lpath), exist_ok=True) + return async with session.get(self.encode_url(rpath), **kw) as r: try: size = int(r.headers["content-length"]) @@ -264,7 +268,7 @@ async def _get_file( if isfilelike(lpath): outfile = lpath else: - outfile = open(lpath, "wb") # noqa: ASYNC230 + outfile = open(unquote(lpath), "wb") # noqa: ASYNC230 try: chunk = True diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py index d014d1155..23c98fafc 100644 --- a/fsspec/implementations/tests/test_http.py +++ b/fsspec/implementations/tests/test_http.py @@ -319,6 +319,16 @@ def test_download(server, tmpdir): assert open(fn, "rb").read() == data +def test_download_dir(server, tmpdir): + h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "}) + url = server.address + "/index/" + fn = os.path.join(tmpdir, "adir") + h.get(url, fn, recursive=True) + assert os.path.exists(fn) + assert os.path.exists(os.path.join(fn, "realfile")) + assert open(os.path.join(fn, "realfile"), "rb").read() == data + + def test_multi_download(server, tmpdir): h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "}) urla = server.realfile