Skip to content

Commit fc92b48

Browse files
authored
support plugins for alternative FsAccess (#233)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <[email protected]>
1 parent feeb0da commit fc92b48

File tree

5 files changed

+166
-13
lines changed

5 files changed

+166
-13
lines changed

README.rst

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,57 @@ Once you upload JSON file to a server, you make a badge by using a link like htt
9191
Here is an example of markdown to add a badge::
9292

9393
![test result](https://flat.badgen.net/https/path/to/generated/json?icon=commonwl)
94+
95+
*************************
96+
Custom file access module
97+
*************************
98+
99+
If your CWL implementation does not write output files to a local file
100+
system location but instead to some other remote storage system, you
101+
can provide an alternate implementation of the *StdFsAccess* object
102+
that is able to access your storage system.
103+
104+
Step 1:
105+
106+
Implement your own class with the same public interface of the
107+
*StdFsAccess* object in *cwltest/stdfsaccess.py* (as of this writing,
108+
the methods are *open*, *size*, *isfile* and *isdir*). These methods
109+
should expect to be called with URIs from the *location* field of the
110+
outputs of test cases.
111+
112+
Define a function that, when called, returns a new instance of your object.
113+
114+
Step 2:
115+
116+
Create a Python package containing your class (or add it to an
117+
existing one).
118+
119+
In the package metadata, add an entry point that declares the module
120+
(in this example, *my_cwl_runner.fsaccess*) containing the function
121+
(in this example, *get_fsaccess*) that *cwltest* will invoke to get an
122+
object implementing the *StdFsAccess* interface.
123+
124+
In *setup.py* this looks like:
125+
126+
.. code:: python
127+
128+
setup(
129+
...
130+
entry_points={"cwltest.fsaccess": ["fsaccess=my_cwl_runner.fsaccess:get_fsaccess"]}},
131+
...
132+
)
133+
134+
In *pyproject.toml* it looks like:
135+
136+
.. code::
137+
138+
[project.entry-points.'cwltest.fsaccess']
139+
fsaccess = 'my_cwl_runner.fsaccess:get_fsaccess'
140+
141+
142+
Step 3:
143+
144+
Install your package in the same Python environemnt as the
145+
installation of *cwltest*. When invoked, *cwltest* will query Python
146+
package metadata for a package with the *cwltest.fsaccess* entry point
147+
and call it to get back a custom filesystem access object.

cwltest/compare.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import hashlib
44
import json
5-
import os.path
6-
import urllib.parse
75
from typing import Any, Callable, Dict, Optional, Set
6+
import cwltest.stdfsaccess
7+
8+
fs_access = cwltest.stdfsaccess.StdFsAccess("")
89

910

1011
class CompareFail(Exception):
@@ -130,13 +131,14 @@ def _compare_location(
130131
actual_comp = "path"
131132
else:
132133
actual_comp = "location"
133-
path = urllib.parse.urlparse(actual[actual_comp]).path
134+
134135
if actual.get("class") == "Directory":
135136
actual[actual_comp] = actual[actual_comp].rstrip("/")
136-
exist_fun: Callable[[str], bool] = os.path.isdir
137+
exist_fun: Callable[[str], bool] = fs_access.isdir
137138
else:
138-
exist_fun = os.path.isfile
139-
if not exist_fun(path) and not skip_details:
139+
exist_fun = fs_access.isfile
140+
141+
if not exist_fun(actual[actual_comp]) and not skip_details:
140142
raise CompareFail.format(
141143
expected,
142144
actual,
@@ -160,15 +162,17 @@ def _compare_location(
160162

161163
def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
162164
if "path" in actual:
163-
path = urllib.parse.urlparse(actual["path"]).path
165+
path = actual["path"]
164166
else:
165-
path = urllib.parse.urlparse(actual["location"]).path
167+
path = actual["location"]
166168
checksum = hashlib.sha1() # nosec
167-
with open(path, "rb") as f:
169+
170+
with fs_access.open(path, "rb") as f:
168171
contents = f.read(1024 * 1024)
169172
while contents != b"":
170173
checksum.update(contents)
171174
contents = f.read(1024 * 1024)
175+
172176
actual_checksum_on_disk = f"sha1${checksum.hexdigest()}"
173177
if "checksum" in actual:
174178
actual_checksum_declared = actual["checksum"]
@@ -193,10 +197,12 @@ def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
193197

194198
def _compare_size(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
195199
if "path" in actual:
196-
path = urllib.parse.urlparse(actual["path"]).path
200+
path = actual["path"]
197201
else:
198-
path = urllib.parse.urlparse(actual["location"]).path
199-
actual_size_on_disk = os.path.getsize(path)
202+
path = actual["location"]
203+
204+
actual_size_on_disk = fs_access.size(path)
205+
200206
if "size" in actual:
201207
actual_size_declared = actual["size"]
202208
if actual_size_on_disk != actual_size_declared:

cwltest/main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
import schema_salad.ref_resolver
1414
import schema_salad.schema
1515
from cwltest.argparser import arg_parser
16-
from cwltest.utils import CWLTestConfig, CWLTestReport, TestResult
16+
from cwltest.utils import (
17+
CWLTestConfig,
18+
CWLTestReport,
19+
TestResult,
20+
load_optional_fsaccess_plugin,
21+
)
1722
from schema_salad.exceptions import ValidationException
1823

1924
from cwltest import logger, utils
@@ -116,6 +121,8 @@ def main() -> int:
116121
suite_name, _ = os.path.splitext(os.path.basename(args.test))
117122
report: Optional[junit_xml.TestSuite] = junit_xml.TestSuite(suite_name, [])
118123

124+
load_optional_fsaccess_plugin()
125+
119126
ntotal: Dict[str, int] = Counter()
120127
npassed: Dict[str, List[CWLTestReport]] = defaultdict(list)
121128

cwltest/stdfsaccess.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Abstracted IO access."""
2+
3+
import os
4+
import urllib
5+
from typing import IO, Any
6+
7+
from schema_salad.ref_resolver import uri_file_path
8+
9+
10+
def abspath(src: str, basedir: str) -> str:
11+
"""Determine local filesystem absolute path given a basedir, handling both plain paths and URIs."""
12+
if src.startswith("file://"):
13+
abpath = uri_file_path(src)
14+
elif urllib.parse.urlsplit(src).scheme in ["http", "https"]:
15+
return src
16+
else:
17+
if basedir.startswith("file://"):
18+
abpath = src if os.path.isabs(src) else basedir + "/" + src
19+
else:
20+
abpath = src if os.path.isabs(src) else os.path.join(basedir, src)
21+
return abpath
22+
23+
24+
class StdFsAccess:
25+
"""Local filesystem implementation."""
26+
27+
def __init__(self, basedir: str) -> None:
28+
"""Perform operations with respect to a base directory."""
29+
self.basedir = basedir
30+
31+
def _abs(self, p: str) -> str:
32+
return abspath(p, self.basedir)
33+
34+
def open(self, fn: str, mode: str) -> IO[Any]:
35+
"""Open a file from a file: URI."""
36+
return open(self._abs(fn), mode)
37+
38+
def size(self, fn: str) -> int:
39+
"""Get the size of the file resource pointed to by a URI."""
40+
return os.stat(self._abs(fn)).st_size
41+
42+
def isfile(self, fn: str) -> bool:
43+
"""Determine if a resource pointed to by a URI represents a file."""
44+
return os.path.isfile(self._abs(fn))
45+
46+
def isdir(self, fn: str) -> bool:
47+
"""Determine if a resource pointed to by a URI represents a directory."""
48+
return os.path.isdir(self._abs(fn))

cwltest/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import schema_salad.avro
2828
import schema_salad.ref_resolver
2929
import schema_salad.schema
30+
import cwltest.compare
31+
import cwltest.stdfsaccess
3032
from cwltest.compare import CompareFail, compare
3133
from rdflib import Graph
3234
from ruamel.yaml.scalarstring import ScalarString
@@ -37,6 +39,10 @@
3739
else:
3840
from importlib_resources import as_file, files
3941

42+
# available since Python 3.8 (minimum version supports as of this
43+
# writing) so we don't need to fuss with backports
44+
from importlib.metadata import entry_points, EntryPoint
45+
4046
from cwltest import REQUIRED, UNSUPPORTED_FEATURE, logger, templock
4147

4248
__all__ = ["files", "as_file"]
@@ -659,3 +665,35 @@ def absuri(path: str) -> str:
659665
if "://" in path:
660666
return path
661667
return "file://" + os.path.abspath(path)
668+
669+
670+
def load_optional_fsaccess_plugin() -> None:
671+
"""
672+
Load optional fsaccess plugin.
673+
674+
Looks for a package with cwltest.fsaccess entry point and if so,
675+
use that to get a filesystem access object that will be used for
676+
checking test output.
677+
"""
678+
fsaccess_eps: List[EntryPoint]
679+
680+
try:
681+
# The interface to importlib.metadata.entry_points() changed
682+
# several times between Python 3.8 and 3.13; the code below
683+
# actually works fine on all of them but there's no single
684+
# mypy annotation that works across of them. Explicitly cast
685+
# it to a consistent type to make mypy shut up.
686+
fsaccess_eps = cast(List[EntryPoint], entry_points()["cwltest.fsaccess"]) # type: ignore [redundant-cast, unused-ignore]
687+
except KeyError:
688+
return
689+
690+
if len(fsaccess_eps) == 0:
691+
return
692+
693+
if len(fsaccess_eps) > 1:
694+
logger.warn(
695+
"More than one cwltest.fsaccess entry point found, selected %s",
696+
fsaccess_eps[0],
697+
)
698+
699+
cwltest.compare.fs_access = fsaccess_eps[0].load()()

0 commit comments

Comments
 (0)