|
4 | 4 | import csv
|
5 | 5 | import json
|
6 | 6 | import os
|
| 7 | +import tempfile |
| 8 | +from pathlib import Path |
7 | 9 | from typing import TYPE_CHECKING
|
8 | 10 |
|
9 | 11 | if TYPE_CHECKING:
|
10 | 12 | from collections.abc import AsyncIterator
|
11 |
| - from pathlib import Path |
12 | 13 | from typing import Any, TextIO
|
13 | 14 |
|
14 | 15 | from typing_extensions import Unpack
|
15 | 16 |
|
16 | 17 | from crawlee._types import ExportDataCsvKwargs, ExportDataJsonKwargs
|
17 | 18 |
|
18 | 19 |
|
19 |
| -METADATA_FILENAME = '__metadata__.json' |
20 |
| -"""The name of the metadata file for storage clients.""" |
21 |
| - |
22 |
| - |
23 | 20 | def infer_mime_type(value: Any) -> str:
|
24 | 21 | """Infer the MIME content type from the value.
|
25 | 22 |
|
@@ -58,26 +55,55 @@ async def json_dumps(obj: Any) -> str:
|
58 | 55 |
|
59 | 56 |
|
60 | 57 | async def atomic_write_text(path: Path, data: str) -> None:
|
61 |
| - tmp = path.with_suffix(path.suffix + '.tmp') |
62 |
| - # write to .tmp |
63 |
| - await asyncio.to_thread(tmp.write_text, data, encoding='utf-8') |
| 58 | + dir_path = path.parent |
| 59 | + |
| 60 | + def _sync_write_text() -> str: |
| 61 | + # create a temp file in the target dir, return its name |
| 62 | + fd, tmp_path = tempfile.mkstemp( |
| 63 | + suffix=path.suffix, |
| 64 | + prefix=f'{path.name}.', |
| 65 | + dir=str(dir_path), |
| 66 | + ) |
| 67 | + try: |
| 68 | + with os.fdopen(fd, 'w', encoding='utf-8') as tmp_file: |
| 69 | + tmp_file.write(data) |
| 70 | + except: |
| 71 | + Path(tmp_path).unlink(missing_ok=True) |
| 72 | + raise |
| 73 | + return tmp_path |
| 74 | + |
| 75 | + tmp_path = await asyncio.to_thread(_sync_write_text) |
64 | 76 |
|
65 | 77 | try:
|
66 |
| - await asyncio.to_thread(os.replace, tmp, path) |
67 |
| - except FileNotFoundError: |
68 |
| - # If the .tmp vanished, fall back to a straight write |
| 78 | + await asyncio.to_thread(os.replace, tmp_path, str(path)) |
| 79 | + except (FileNotFoundError, PermissionError): |
| 80 | + # fallback if tmp went missing |
69 | 81 | await asyncio.to_thread(path.write_text, data, encoding='utf-8')
|
70 | 82 |
|
71 | 83 |
|
72 | 84 | async def atomic_write_bytes(path: Path, data: bytes) -> None:
|
73 |
| - tmp = path.with_suffix(path.suffix + '.tmp') |
74 |
| - # write to .tmp |
75 |
| - await asyncio.to_thread(tmp.write_bytes, data) |
| 85 | + dir_path = path.parent |
| 86 | + |
| 87 | + def _sync_write_bytes() -> str: |
| 88 | + fd, tmp_path = tempfile.mkstemp( |
| 89 | + suffix=path.suffix, |
| 90 | + prefix=f'{path.name}.', |
| 91 | + dir=str(dir_path), |
| 92 | + ) |
| 93 | + try: |
| 94 | + with os.fdopen(fd, 'wb') as tmp_file: |
| 95 | + tmp_file.write(data) |
| 96 | + except: |
| 97 | + Path(tmp_path).unlink(missing_ok=True) |
| 98 | + raise |
| 99 | + return tmp_path |
| 100 | + |
| 101 | + tmp_path = await asyncio.to_thread(_sync_write_bytes) |
76 | 102 |
|
77 | 103 | try:
|
78 |
| - await asyncio.to_thread(os.replace, tmp, path) |
79 |
| - except FileNotFoundError: |
80 |
| - # If the .tmp vanished, fall back to a straight write |
| 104 | + await asyncio.to_thread(os.replace, tmp_path, str(path)) |
| 105 | + except (FileNotFoundError, PermissionError): |
| 106 | + # fallback if tmp went missing |
81 | 107 | await asyncio.to_thread(path.write_bytes, data)
|
82 | 108 |
|
83 | 109 |
|
|
0 commit comments