Skip to content

Commit cc21a65

Browse files
committed
Streaming JSON-LD parser.
* Non-event based, but processes input in strict order (with a few exceptions). Passes 468/483 tests, with some arcane features not easily implemented using this approach. * Available through `JSON::LD::Reader` interface, with the `:stream` option set to `true`.
1 parent 86b25a3 commit cc21a65

File tree

10 files changed

+864
-25
lines changed

10 files changed

+864
-25
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ JSON::LD can now be used to create a _context_ from an RDFS/OWL definition, and
1919

2020
Install with `gem install json-ld`
2121

22+
### JSON-LD Streaming Profile
23+
This gem implements an optimized streaming reader used for generating RDF from large dataset dumps formatted as JSON-LD. Such documents must correspond to the [JSON-LD Streaming Profile](https://w3c.github.io/json-ld-streaming/):
24+
25+
* Keys in JSON objects must be ordered with any of `@context`, and/or `@type` coming before any other keys, in that order. This includes aliases of those keys. It is strongly encouraged that `@id` be present, and come immediately after.
26+
* JSON-LD documents can be signaled or requested in [streaming document form](https://w3c.github.io/json-ld-streaming/#dfn-streaming-document-form). The profile URI identifying the [streaming document form](https://w3c.github.io/json-ld-streaming/#dfn-streaming-document-form) is `http://www.w3.org/ns/json-ld#streaming`.
27+
2228
### MultiJson parser
2329
The [MultiJson](https://rubygems.org/gems/multi_json) gem is used for parsing JSON; this defaults to the native JSON parser, but will use a more performant parser if one is available. A specific parser can be specified by adding the `:adapter` option to any API call. See [MultiJson](https://rubygems.org/gems/multi_json) for more information.
2430

json-ld.gemspec

100644100755
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ Gem::Specification.new do |gem|
3030
gem.add_runtime_dependency 'link_header', '~> 0.0', '>= 0.0.8'
3131
gem.add_runtime_dependency 'lru_redux', '~> 1.1'
3232
gem.add_runtime_dependency 'json-canonicalization', '~> 0.2'
33-
gem.add_runtime_dependency 'htmlentities', '~> 4.3'
33+
gem.add_runtime_dependency 'json-stream', '~> 0.2'
34+
gem.add_runtime_dependency 'htmlentities', '~> 4.3'
3435
gem.add_runtime_dependency 'rack', '~> 2.0'
3536
gem.add_development_dependency 'sinatra-linkeddata','~> 3.1'
3637
gem.add_development_dependency 'jsonlint', '~> 0.3' unless is_java

lib/json/ld.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ module LD
3434
autoload :Normalize, 'json/ld/normalize'
3535
autoload :Reader, 'json/ld/reader'
3636
autoload :Resource, 'json/ld/resource'
37+
autoload :StreamingReader, 'json/ld/streaming_reader'
38+
autoload :StreamingWriter, 'json/ld/streaming_writer'
3739
autoload :VERSION, 'json/ld/version'
3840
autoload :Writer, 'json/ld/writer'
3941

@@ -143,6 +145,7 @@ class InvalidReverseValue < JsonLdError; @code = "invalid @reverse value"; end
143145
class InvalidScopedContext < JsonLdError; @code = "invalid scoped context"; end
144146
class InvalidScriptElement < JsonLdError; @code = "invalid script element"; end
145147
class InvalidSetOrListObject < JsonLdError; @code = "invalid set or list object"; end
148+
class InvalidStreamingKeyOrder < JsonLdError; @code = 'invalid streaming key order' end
146149
class InvalidTermDefinition < JsonLdError; @code = "invalid term definition"; end
147150
class InvalidBaseDirection < JsonLdError; @code = "invalid base direction"; end
148151
class InvalidTypedValue < JsonLdError; @code = "invalid typed value"; end

lib/json/ld/context.rb

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1517,8 +1517,8 @@ def expand_value(property, value, useNativeTypes: false, rdfDirection: nil, base
15171517
if value.datatype == RDF::URI(RDF.to_uri + "JSON") && processingMode('json-ld-1.1')
15181518
# Value parsed as JSON
15191519
# FIXME: MultiJson
1520-
res['@value'] = ::JSON.parse(value.object)
15211520
res['@type'] = '@json'
1521+
res['@value'] = ::JSON.parse(value.object)
15221522
elsif value.datatype.start_with?("https://www.w3.org/ns/i18n#") && rdfDirection == 'i18n-datatype' && processingMode('json-ld-1.1')
15231523
lang, dir = value.datatype.fragment.split('_')
15241524
res['@value'] = value.to_s
@@ -1534,24 +1534,23 @@ def expand_value(property, value, useNativeTypes: false, rdfDirection: nil, base
15341534
end
15351535
res['@direction'] = dir
15361536
elsif useNativeTypes && RDF_LITERAL_NATIVE_TYPES.include?(value.datatype)
1537-
res['@value'] = value.object
15381537
res['@type'] = uri(coerce(property)) if coerce(property)
1538+
res['@value'] = value.object
15391539
else
15401540
value.canonicalize! if value.datatype == RDF::XSD.double
1541-
res['@value'] = value.to_s
15421541
if coerce(property)
15431542
res['@type'] = uri(coerce(property)).to_s
15441543
elsif value.has_datatype?
15451544
res['@type'] = uri(value.datatype).to_s
15461545
elsif value.has_language? || language(property)
15471546
res['@language'] = (value.language || language(property)).to_s
1548-
# FIXME: direction
15491547
end
1548+
res['@value'] = value.to_s
15501549
end
15511550
res
15521551
else
15531552
# Otherwise, initialize result to a JSON object with an @value member whose value is set to value.
1554-
res = {'@value' => value}
1553+
res = {}
15551554

15561555
if td.type_mapping && !CONTAINERS_ID_VOCAB.include?(td.type_mapping.to_s)
15571556
res['@type'] = td.type_mapping.to_s
@@ -1562,7 +1561,7 @@ def expand_value(property, value, useNativeTypes: false, rdfDirection: nil, base
15621561
res['@direction'] = direction if direction
15631562
end
15641563

1565-
res
1564+
res.merge('@value' => value)
15661565
end
15671566

15681567
result

lib/json/ld/reader.rb

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# -*- encoding: utf-8 -*-
2-
# frozen_string_literal: true
2+
33
module JSON::LD
44
##
55
# A JSON-LD parser in Ruby.
66
#
77
# @see https://www.w3.org/TR/json-ld11-api
88
# @author [Gregg Kellogg](http://greggkellogg.net/)
99
class Reader < RDF::Reader
10+
include StreamingReader
1011
format Format
1112

1213
##
@@ -46,6 +47,12 @@ def self.options
4647
control: :select,
4748
on: ["--rdf-direction DIR", %w(i18n-datatype compound-literal)],
4849
description: "How to serialize literal direction (i18n-datatype compound-literal)") {|arg| RDF::URI(arg)},
50+
RDF::CLI::Option.new(
51+
symbol: :stream,
52+
datatype: TrueClass,
53+
control: :checkbox,
54+
on: ["--[no-]stream"],
55+
description: "Optimize for streaming JSON-LD to RDF.") {|arg| arg},
4956
]
5057
end
5158

@@ -63,13 +70,11 @@ def initialize(input = $stdin, **options, &block)
6370
options[:base_uri] ||= options[:base]
6471
super do
6572
@options[:base] ||= base_uri.to_s if base_uri
66-
begin
67-
# Trim non-JSON stuff in script.
68-
@doc = if input.respond_to?(:read)
69-
input
70-
else
71-
StringIO.new(input.to_s.sub(%r(\A[^{\[]*)m, '').sub(%r([^}\]]*\Z)m, ''))
72-
end
73+
# Trim non-JSON stuff in script.
74+
@doc = if input.respond_to?(:read)
75+
input
76+
else
77+
StringIO.new(input.to_s.sub(%r(\A[^{\[]*)m, '').sub(%r([^}\]]*\Z)m, ''))
7378
end
7479

7580
if block_given?
@@ -85,7 +90,11 @@ def initialize(input = $stdin, **options, &block)
8590
# @private
8691
# @see RDF::Reader#each_statement
8792
def each_statement(&block)
88-
JSON::LD::API.toRdf(@doc, **@options, &block)
93+
if @options[:stream]
94+
stream_statement(&block)
95+
else
96+
API.toRdf(@doc, **@options, &block)
97+
end
8998
rescue ::JSON::ParserError, ::JSON::LD::JsonLdError => e
9099
log_fatal("Failed to parse input document: #{e.message}", exception: RDF::ReaderError)
91100
end
@@ -95,7 +104,7 @@ def each_statement(&block)
95104
# @see RDF::Reader#each_triple
96105
def each_triple(&block)
97106
if block_given?
98-
JSON::LD::API.toRdf(@doc, **@options) do |statement|
107+
each_statement do |statement|
99108
yield(*statement.to_triple)
100109
end
101110
end

0 commit comments

Comments
 (0)