Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 76 additions & 32 deletions jsonschema/resolve.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,35 @@ type Resolved struct {
root *Schema
// map from $ids to their schemas
resolvedURIs map[string]*Schema
// map from schemas to additional info computed during resolution
resolvedInfo map[*Schema]*resolvedInfo
}

// resolvedInfo holds information specific to a schema that is computed by [Schema.Resolve].
type resolvedInfo struct {
s *Schema
// The schema's base schema.
// If the schema is the root or has an ID, its base is itself.
// Otherwise, its base is the innermost enclosing schema whose base
// is itself.
// Intuitively, a base schema is one that can be referred to with a
// fragmentless URI.
base *Schema
// The URI for the schema, if it is the root or has an ID.
// Otherwise nil.
// Invariants:
// s.base.uri != nil.
// s.base == s <=> s.uri != nil
uri *url.URL
// The schema to which Ref refers.
resolvedRef *Schema

// If the schema has a dynamic ref, exactly one of the next two fields
// will be non-zero after successful resolution.
// The schema to which the dynamic ref refers when it acts lexically.
resolvedDynamicRef *Schema
// The anchor to look up on the stack when the dynamic ref acts dynamically.
dynamicRefAnchor string
}

// Schema returns the schema that was resolved.
Expand Down Expand Up @@ -59,6 +88,8 @@ type ResolveOptions struct {
// Resolve resolves all references within the schema and performs other tasks that
// prepare the schema for validation.
// If opts is nil, the default values are used.
// The schema must not be changed after Resolve is called.
// The same schema may be resolved multiple times.
func (root *Schema) Resolve(opts *ResolveOptions) (*Resolved, error) {
// There are up to five steps required to prepare a schema to validate.
// 1. Load: read the schema from somewhere and unmarshal it.
Expand All @@ -71,9 +102,6 @@ func (root *Schema) Resolve(opts *ResolveOptions) (*Resolved, error) {
// in a map from URIs to schemas within root.
// 4. Resolve references: all refs in the schemas are replaced with the schema they refer to.
// 5. (Optional.) If opts.ValidateDefaults is true, validate the defaults.
if root.path != "" {
return nil, fmt.Errorf("jsonschema: Resolve: %s already resolved", root)
}
r := &resolver{loaded: map[string]*Resolved{}}
if opts != nil {
r.opts = *opts
Expand Down Expand Up @@ -125,16 +153,16 @@ func (r *resolver) resolve(s *Schema, baseURI *url.URL) (*Resolved, error) {
return nil, err
}

m, err := resolveURIs(s, baseURI)
if err != nil {
rs := &Resolved{root: s}
if err := resolveURIs(rs, baseURI); err != nil {
return nil, err
}
rs := &Resolved{root: s, resolvedURIs: m}

// Remember the schema by both the URI we loaded it from and its canonical name,
// which may differ if the schema has an $id.
// We must set the map before calling resolveRefs, or ref cycles will cause unbounded recursion.
r.loaded[baseURI.String()] = rs
r.loaded[s.uri.String()] = rs
r.loaded[rs.resolvedInfo[s].uri.String()] = rs

if err := r.resolveRefs(rs); err != nil {
return nil, err
Expand All @@ -161,6 +189,10 @@ func (root *Schema) check() error {
// checkStructure verifies that root and its subschemas form a tree.
// It also assigns each schema a unique path, to improve error messages.
func (root *Schema) checkStructure() error {
if root.path != "" {
// We have done this before, and it will always produce the same result.
return nil
}
var check func(reflect.Value, []byte) error
check = func(v reflect.Value, path []byte) error {
// For the purpose of error messages, the root schema has path "root"
Expand All @@ -175,10 +207,9 @@ func (root *Schema) checkStructure() error {
}
if s.path != "" {
// We've seen s before.
// The schema graph at root is not a tree, but it needs to
// be because we assume a unique parent when we store a schema's base
// in the Schema. A cycle would also put Schema.all into an infinite
// recursion.
// The schema graph at root is not a tree, but it needs to be because
// a schema's base must be unique.
// A cycle would also put Schema.all into an infinite recursion.
return fmt.Errorf("jsonschema: schemas at %s do not form a tree; %s appears more than once (also at %s)",
root, s.path, p)
}
Expand Down Expand Up @@ -285,8 +316,6 @@ func (s *Schema) checkLocal(report func(error)) {
// to baseURI.
// See https://json-schema.org/draft/2020-12/json-schema-core#section-8.2, section
// 8.2.1.

// TODO(jba): dynamicAnchors (§8.2.2)
//
// Every schema has a base URI and a parent base URI.
//
Expand Down Expand Up @@ -316,11 +345,17 @@ func (s *Schema) checkLocal(report func(error)) {
// allOf/1 http://b.com (absolute $id; doesn't matter that it's not under the loaded URI)
// allOf/2 http://a.com/root.json (inherited from parent)
// allOf/2/not http://a.com/root.json (inherited from parent)
func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
resolvedURIs := map[string]*Schema{}

func resolveURIs(rs *Resolved, baseURI *url.URL) error {
var resolve func(s, base *Schema) error
resolve = func(s, base *Schema) error {
assert(rs.resolvedInfo[base] != nil, "base resolved info not set")
info := rs.resolvedInfo[s]
if info == nil {
info = &resolvedInfo{s: s}
rs.resolvedInfo[s] = info
}
baseURI := rs.resolvedInfo[base].uri

// ids are scoped to the root.
if s.ID != "" {
// A non-empty ID establishes a new base.
Expand All @@ -332,21 +367,21 @@ func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
return fmt.Errorf("$id %s must not have a fragment", s.ID)
}
// The base URI for this schema is its $id resolved against the parent base.
s.uri = base.uri.ResolveReference(idURI)
if !s.uri.IsAbs() {
return fmt.Errorf("$id %s does not resolve to an absolute URI (base is %s)", s.ID, s.base.uri)
info.uri = baseURI.ResolveReference(idURI)
if !info.uri.IsAbs() {
return fmt.Errorf("$id %s does not resolve to an absolute URI (base is %s)", s.ID, baseURI)
}
resolvedURIs[s.uri.String()] = s
rs.resolvedURIs[info.uri.String()] = s
base = s // needed for anchors
}
s.base = base
info.base = base

// Anchors and dynamic anchors are URI fragments that are scoped to their base.
// We treat them as keys in a map stored within the schema.
setAnchor := func(anchor string, dynamic bool) error {
if anchor != "" {
if _, ok := base.anchors[anchor]; ok {
return fmt.Errorf("duplicate anchor %q in %s", anchor, base.uri)
return fmt.Errorf("duplicate anchor %q in %s", anchor, baseURI)
}
if base.anchors == nil {
base.anchors = map[string]anchorInfo{}
Expand All @@ -368,28 +403,29 @@ func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
}

// Set the root URI to the base for now. If the root has an $id, this will change.
root.uri = baseURI
// The original base, even if changed, is still a valid way to refer to the root.
resolvedURIs[baseURI.String()] = root
if err := resolve(root, root); err != nil {
return nil, err
rs.resolvedInfo = map[*Schema]*resolvedInfo{
rs.root: {s: rs.root, uri: baseURI},
}
return resolvedURIs, nil
// The original base, even if changed, is still a valid way to refer to the root.
rs.resolvedURIs = map[string]*Schema{baseURI.String(): rs.root}

return resolve(rs.root, rs.root)
}

// resolveRefs replaces every ref in the schemas with the schema it refers to.
// A reference that doesn't resolve within the schema may refer to some other schema
// that needs to be loaded.
func (r *resolver) resolveRefs(rs *Resolved) error {
for s := range rs.root.all() {
info := rs.resolvedInfo[s]
if s.Ref != "" {
refSchema, _, err := r.resolveRef(rs, s, s.Ref)
if err != nil {
return err
}
// Whether or not the anchor referred to by $ref fragment is dynamic,
// the ref still treats it lexically.
s.resolvedRef = refSchema
info.resolvedRef = refSchema
}
if s.DynamicRef != "" {
refSchema, frag, err := r.resolveRef(rs, s, s.DynamicRef)
Expand All @@ -399,11 +435,11 @@ func (r *resolver) resolveRefs(rs *Resolved) error {
if frag != "" {
// The dynamic ref's fragment points to a dynamic anchor.
// We must resolve the fragment at validation time.
s.dynamicRefAnchor = frag
info.dynamicRefAnchor = frag
} else {
// There is no dynamic anchor in the lexically referenced schema,
// so the dynamic ref behaves like a lexical ref.
s.resolvedDynamicRef = refSchema
info.resolvedDynamicRef = refSchema
}
}
}
Expand All @@ -417,7 +453,8 @@ func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, d
return nil, "", err
}
// URI-resolve the ref against the current base URI to get a complete URI.
refURI = s.base.uri.ResolveReference(refURI)
base := rs.resolvedInfo[s].base
refURI = rs.resolvedInfo[base].uri.ResolveReference(refURI)
// The non-fragment part of a ref URI refers to the base URI of some schema.
// This part is the same for dynamic refs too: their non-fragment part resolves
// lexically.
Expand Down Expand Up @@ -447,6 +484,13 @@ func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, d
}
referencedSchema = lrs.root
assert(referencedSchema != nil, "nil referenced schema")
// Copy the resolvedInfos from lrs into rs, without overwriting
// (hence we can't use maps.Insert).
for s, i := range lrs.resolvedInfo {
if rs.resolvedInfo[s] == nil {
rs.resolvedInfo[s] = i
}
}
}
}

Expand Down
9 changes: 6 additions & 3 deletions jsonschema/resolve_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,9 @@ func TestResolveURIs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
got, err := resolveURIs(root, base)
if err != nil {

rs := &Resolved{root: root}
if err := resolveURIs(rs, base); err != nil {
t.Fatal(err)
}

Expand All @@ -154,6 +155,7 @@ func TestResolveURIs(t *testing.T) {
},
}

got := rs.resolvedURIs
gotKeys := slices.Sorted(maps.Keys(got))
wantKeys := slices.Sorted(maps.Keys(wantIDs))
if !slices.Equal(gotKeys, wantKeys) {
Expand Down Expand Up @@ -193,13 +195,14 @@ func TestRefCycle(t *testing.T) {
}

rs, err := schemas["root"].Resolve(&ResolveOptions{Loader: loader})
t.Logf("%#v", rs.resolvedInfo)
if err != nil {
t.Fatal(err)
}

check := func(s *Schema, key string) {
t.Helper()
if s.resolvedRef != schemas[key] {
if rs.resolvedInfo[s].resolvedRef != schemas[key] {
t.Errorf("%s resolvedRef != schemas[%q]", s.json(), key)
}
}
Expand Down
50 changes: 10 additions & 40 deletions jsonschema/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"iter"
"maps"
"math"
"net/url"
"reflect"
"regexp"
"slices"
Expand Down Expand Up @@ -130,37 +129,13 @@ type Schema struct {
// Extra allows for additional keywords beyond those specified.
Extra map[string]any `json:"-"`

// computed fields

// This schema's base schema.
// If the schema is the root or has an ID, its base is itself.
// Otherwise, its base is the innermost enclosing schema whose base
// is itself.
// Intuitively, a base schema is one that can be referred to with a
// fragmentless URI.
base *Schema

// The URI for the schema, if it is the root or has an ID.
// Otherwise nil.
// Invariants:
// s.base.uri != nil.
// s.base == s <=> s.uri != nil
uri *url.URL
// These fields are independent of arguments to Schema.Resolved,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand how there can be any computed fields in a Schema if it is to be stateless, and a valid operand of many independent calls to Resolve.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unexported fields are the outputs of functions on the other fields. They are cached computations.
For example, pattern is always the result of compiling Pattern.
I am assuming a Schema is not changed after Resolve, as documented.
(How to thread-protect the setting of these cached fields will be the topic of another PR.)
However, thinking about this, path is not in fact a function of the schema, but its ancestors as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved everything into resolvedInfo.

I am trying to take advantage of the fact that a GitHub PR consists of multiple commits, by structuring the work like a stack of CLs. Of the two new commits, the first makes this change, and the second is just a rename. You can view them separately via the "Changes from..." dropdown on the "Files changed" tab.

// though they are computed there.

// The JSON Pointer path from the root schema to here.
// Used in errors.
path string

// The schema to which Ref refers.
resolvedRef *Schema

// If the schema has a dynamic ref, exactly one of the next two fields
// will be non-zero after successful resolution.
// The schema to which the dynamic ref refers when it acts lexically.
resolvedDynamicRef *Schema
// The anchor to look up on the stack when the dynamic ref acts dynamically.
dynamicRefAnchor string

// Map from anchors to subschemas.
anchors map[string]anchorInfo

Expand All @@ -186,27 +161,22 @@ type anchorInfo struct {

// String returns a short description of the schema.
func (s *Schema) String() string {
if s.uri != nil {
if u := s.uri.String(); u != "" {
return u
}
}
if a := cmp.Or(s.Anchor, s.DynamicAnchor); a != "" {
return fmt.Sprintf("%q, anchor %s", s.base.uri.String(), a)
return fmt.Sprintf("anchor %s", a)
}
if s.path != "" {
return s.path
}
return "<anonymous schema>"
}

// ResolvedRef returns the Schema to which this schema's $ref keyword
// refers, or nil if it doesn't have a $ref.
// It returns nil if this schema has not been resolved, meaning that
// [Schema.Resolve] was called on it or one of its ancestors.
func (s *Schema) ResolvedRef() *Schema {
return s.resolvedRef
}
// // ResolvedRef returns the Schema to which this schema's $ref keyword
// // refers, or nil if it doesn't have a $ref.
// // It returns nil if this schema has not been resolved, meaning that
// // [Schema.Resolve] was called on it or one of its ancestors.
// func (s *Schema) ResolvedRef() *Schema {
// return s.resolvedRef
// }

func (s *Schema) basicChecks() error {
if s.Type != "" && s.Types != nil {
Expand Down
Loading
Loading