From c6203647f0484a67d7f45e06896722cf9ebfb624 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Wed, 19 Mar 2025 14:42:56 +0100 Subject: [PATCH 01/23] feat: Add support for HIF format --- src/io.jl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/io.jl b/src/io.jl index 25afa9e..5e1c445 100644 --- a/src/io.jl +++ b/src/io.jl @@ -3,6 +3,7 @@ abstract type Abstract_HG_format end struct HGF_Format <: Abstract_HG_format end struct JSON_Format <: Abstract_HG_format end +struct HIF_Format <: Abstract_HG_format end """ @@ -227,3 +228,47 @@ hg_load( V = Nothing, E = Nothing) where {U <: Real} = open(io -> hg_load(io, format; T=T, D=D, V=V, E=E), fname, "r") + + + +""" + hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, + V = Nothing, + E = Nothing + ) where {U <: Real} + +Loads a hypergraph from a stream `io` from `HIF` format. +More info: https://github.com/pszufe/HIF-standard + +**Arguments** + +* `T` : type of weight values stored in the hypergraph's adjacency matrix +* `D` : dictionary for storing values the default is `Dict{Int, T}` +* `V` : type of values stored in the vertices of the hypergraph +* `E` : type of values stored in the edges of the hypergraph + +""" +function hq_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V = Nothing, + E = Nothing + ) where {U <: Real} + _ = format + + data = JSON3.read(read(io, String)) + + k = length(data.edges) + + n = length(data.nodes) + + h = Hypergraph{T, V, E, D}(n, k) + + h +end From 75208fd87727ea5483d563b6e7a280de7e090ac6 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Wed, 19 Mar 2025 16:13:15 +0100 Subject: [PATCH 02/23] add function to parse input for hg dimensions --- src/SimpleHypergraphs.jl | 3 ++- src/io.jl | 8 ++++---- src/utils.jl | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 src/utils.jl diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 2ea1c7a..1e836b6 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -23,7 +23,7 @@ export get_twosection_adjacency_mx, get_twosection_weighted_adjacency_mx export dual export random_model, random_kuniform_model, random_dregular_model, random_preferential_model -export Abstract_HG_format, HGF_Format, JSON_Format +export Abstract_HG_format, HGF_Format, JSON_Format, HIF_Format export hg_load, hg_save export modularity @@ -73,6 +73,7 @@ end include("hypergraph.jl") include("io.jl") +include("utils.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/io.jl b/src/io.jl index 5e1c445..859e330 100644 --- a/src/io.jl +++ b/src/io.jl @@ -252,7 +252,7 @@ More info: https://github.com/pszufe/HIF-standard * `E` : type of values stored in the edges of the hypergraph """ -function hq_load( +function hg_load( io::IO, format::HIF_Format; T::Type{U} = Bool, @@ -264,11 +264,11 @@ function hq_load( data = JSON3.read(read(io, String)) - k = length(data.edges) + dims = get_hg_dims_from_hif(data, V, E) - n = length(data.nodes) + print(dims) - h = Hypergraph{T, V, E, D}(n, k) + h = Hypergraph{T, V, E, D}(dims.num_nodes, dims.num_edges) h end diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 0000000..2ff3e21 --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,38 @@ +using JSON3 + + +struct HypergraphDimensions + num_nodes::Int + num_edges::Int +end + + +function get_hg_dims_from_hif(data::JSON3.Object, V, E) + num_nodes = length(get(data, "nodes", [])) + num_edges = length(get(data, "edges", [])) + + if num_nodes != 0 && num_edges != 0 + return HypergraphDimensions(num_nodes, num_edges) + end + + nodes = Set{V}() + edges = Set{E}() + + for i in 1:length(data.incidences) + inc = data.incidences[i] + + if inc.node ∉ nodes + push!(nodes, inc.node) + end + + if inc.edge ∉ edges + push!(edges, inc.edge) + end + end + + HypergraphDimensions( + length(nodes), + length(edges) + ) + +end \ No newline at end of file From 7694c4af0397872e4bd11ab5558d13b9da148d70 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 09:58:16 +0100 Subject: [PATCH 03/23] finalize PoC for hg loading --- src/SimpleHypergraphs.jl | 1 - src/io.jl | 80 ++++++++++++++++++++++++++++++++++++---- src/utils.jl | 38 ------------------- 3 files changed, 73 insertions(+), 46 deletions(-) delete mode 100644 src/utils.jl diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 1e836b6..430e722 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -73,7 +73,6 @@ end include("hypergraph.jl") include("io.jl") -include("utils.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/io.jl b/src/io.jl index 859e330..0eddea8 100644 --- a/src/io.jl +++ b/src/io.jl @@ -237,8 +237,8 @@ hg_load( format::HIF_Format; T::Type{U} = Bool, D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - V = Nothing, - E = Nothing + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String ) where {U <: Real} Loads a hypergraph from a stream `io` from `HIF` format. @@ -257,18 +257,84 @@ function hg_load( format::HIF_Format; T::Type{U} = Bool, D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V = Nothing, - E = Nothing + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String ) where {U <: Real} _ = format data = JSON3.read(read(io, String)) - dims = get_hg_dims_from_hif(data, V, E) + nodes = get(data, "nodes", []) + edges = get(data, "edges", []) + + if length(nodes) == 0 || length(edges) == 0 + node_set = Set{V}() + edge_set = Set{E}() + + for inc in data.incidences + if inc.node ∉ node_set + push!(node_set, inc.node) + push!(nodes, inc.node) + end + + if inc.edge ∉ edge_set + push!(edge_set, inc.edge) + push!(edges, inc.edge) + end + end + else + nodes = [node.node for node in nodes] + edges = [edge.edge for edge in edges] + end + + node_dict = Dict(val => i for (i, val) in pairs(nodes)) + edge_dict = Dict(val => i for (i, val) in pairs(edges)) + + n = length(nodes) + k = length(edges) - print(dims) + h = Hypergraph{T, V, E, D}(n, k, nodes, edges) - h = Hypergraph{T, V, E, D}(dims.num_nodes, dims.num_edges) + for inc in data.incidences + node_idx = node_dict[inc.node] + he_idx = edge_dict[inc.edge] + + h[node_idx, he_idx] = inc.weight + end h end + + +""" + hg_load( + fname::AbstractString; + format::Abstract_HG_format = HIF_Format(), + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String + ) where {U <: Real} + ) + +Loads a hypergraph from a file `fname`. +The default saving format is `json`. + +**Arguments** + +* `T` : type of weight values stored in the hypergraph's adjacency matrix +* `D` : dictionary for storing values the default is `Dict{Int, T}` +* `V` : type of values stored in the vertices of the hypergraph +* `E` : type of values stored in the edges of the hypergraph + +""" +function hg_load( + fname::AbstractString, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String + ) where {U <: Real} + open(io -> hg_load(io, format; T=T, D=D, V=V, E=E), fname, "r") +end diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index 2ff3e21..0000000 --- a/src/utils.jl +++ /dev/null @@ -1,38 +0,0 @@ -using JSON3 - - -struct HypergraphDimensions - num_nodes::Int - num_edges::Int -end - - -function get_hg_dims_from_hif(data::JSON3.Object, V, E) - num_nodes = length(get(data, "nodes", [])) - num_edges = length(get(data, "edges", [])) - - if num_nodes != 0 && num_edges != 0 - return HypergraphDimensions(num_nodes, num_edges) - end - - nodes = Set{V}() - edges = Set{E}() - - for i in 1:length(data.incidences) - inc = data.incidences[i] - - if inc.node ∉ nodes - push!(nodes, inc.node) - end - - if inc.edge ∉ edges - push!(edges, inc.edge) - end - end - - HypergraphDimensions( - length(nodes), - length(edges) - ) - -end \ No newline at end of file From ef226323e89b5b87a975d91e48156debd15fc897 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 10:43:12 +0100 Subject: [PATCH 04/23] add function for saving hg in the hiv format --- src/io.jl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/io.jl b/src/io.jl index 0eddea8..3f35c1b 100644 --- a/src/io.jl +++ b/src/io.jl @@ -75,6 +75,53 @@ hg_save( open(io -> hg_save(io, h, format), fname, "w") +""" + hg_save(io::IO, h::Hypergraph, format::HIF_Format) + +Saves a hypergraph `h` to an output stream `io` in `HIF` format. + +If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, +the user has to explicit tell the JSON3 package about it, for instance using: + +`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. + +See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. + +""" +function hg_save(io::IO, h::Hypergraph, format::HIF_Format) + _ = format + + json_hg = Dict{Symbol, Any}() + + node_dict = Dict(i => val for (i, val) in pairs(h.v_meta)) + edge_dict = Dict(i => val for (i, val) in pairs(h.he_meta)) + + incidences = [] + + for node_idx in 1:length(h.v_meta) + for edge_idx in 1:length(h.he_meta) + node = node_dict[node_idx] + edge = edge_dict[edge_idx] + weight = h[node_idx, edge_idx] + + if isnothing(weight) + continue + end + + push!(incidences, Dict( + "edge" => edge, + "node" => node, + "weight" => weight + )) + end + end + + json_hg[:incidences] = incidences + + JSON3.write(io, json_hg) +end + + """ hg_load( io::IO, From 94b02b089e9652b77e98133bd7041435df22bfea Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 13:03:04 +0100 Subject: [PATCH 05/23] add test --- src/io.jl | 35 ++++++++++++++++++++++++++++++----- test/runtests.jl | 4 ++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/io.jl b/src/io.jl index 3f35c1b..5e4fbd2 100644 --- a/src/io.jl +++ b/src/io.jl @@ -92,16 +92,38 @@ function hg_save(io::IO, h::Hypergraph, format::HIF_Format) _ = format json_hg = Dict{Symbol, Any}() + incidences = [] + v_meta = h.v_meta + he_meta = h.he_meta - node_dict = Dict(i => val for (i, val) in pairs(h.v_meta)) - edge_dict = Dict(i => val for (i, val) in pairs(h.he_meta)) + if any(isnothing, h.v_meta) + v_meta = [i for i in 1:length(h.v_meta)] + end + + if any(isnothing, h.he_meta) + he_meta = [i for i in 1:length(h.he_meta)] + end + + node_dict = Dict(i => val for (i, val) in pairs(v_meta)) + edge_dict = Dict(i => val for (i, val) in pairs(he_meta)) - incidences = [] - for node_idx in 1:length(h.v_meta) - for edge_idx in 1:length(h.he_meta) + types = collect(typeof(h).parameters) + V = types[2] + E = types[3] + + for node_idx in 1:length(v_meta) + for edge_idx in 1:length(he_meta) node = node_dict[node_idx] + if V == String + node = string(node) + end + edge = edge_dict[edge_idx] + if E == String + edge = string(edge) + end + weight = h[node_idx, edge_idx] if isnothing(weight) @@ -334,6 +356,9 @@ function hg_load( edges = [edge.edge for edge in edges] end + sort!(nodes) + sort!(edges) + node_dict = Dict(val => i for (i, val) in pairs(nodes)) edge_dict = Dict(val => i for (i, val) in pairs(edges)) diff --git a/test/runtests.jl b/test/runtests.jl index fc7fb61..a1b8591 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -73,6 +73,10 @@ h1[5,2] = 6.5 @test get_vertex_meta(h1, 1) == get_vertex_meta(loaded_hg, 1) @test get_hyperedge_meta(h1, 2) == get_hyperedge_meta(loaded_hg, 2) + hg_save(path, h1, format=HIF_Format()) + loaded_hg = hg_load(path; format=HIF_Format(), T=Float64, V=Int, E=String) + + @test h1 == loaded_hg end @test_throws ArgumentError hg_load("data/test_malformedcomment.hgf"; T=Int) From b491ee1e2839d067ca13a55d5a29fc2b382428e6 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 14:44:02 +0100 Subject: [PATCH 06/23] move to a separate file --- src/SimpleHypergraphs.jl | 1 + src/hif_io.jl | 182 ++++++++++++++++++++++++++++++++++++++ src/io.jl | 184 --------------------------------------- 3 files changed, 183 insertions(+), 184 deletions(-) create mode 100644 src/hif_io.jl diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 430e722..0ad146a 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -73,6 +73,7 @@ end include("hypergraph.jl") include("io.jl") +include("hif_io.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/hif_io.jl b/src/hif_io.jl new file mode 100644 index 0000000..514470e --- /dev/null +++ b/src/hif_io.jl @@ -0,0 +1,182 @@ +struct HIF_Format <: Abstract_HG_format end + + +""" + hg_save(io::IO, h::Hypergraph, format::HIF_Format) + +Saves a hypergraph `h` to an output stream `io` in `HIF` format. + +If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, +the user has to explicit tell the JSON3 package about it, for instance using: + +`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. + +See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. + +""" +function hg_save(io::IO, h::Hypergraph, format::HIF_Format) + _ = format + + json_hg = Dict{Symbol, Any}() + incidences = [] + v_meta = h.v_meta + he_meta = h.he_meta + + if any(isnothing, h.v_meta) + v_meta = [i for i in 1:length(h.v_meta)] + end + + if any(isnothing, h.he_meta) + he_meta = [i for i in 1:length(h.he_meta)] + end + + node_dict = Dict(i => val for (i, val) in pairs(v_meta)) + edge_dict = Dict(i => val for (i, val) in pairs(he_meta)) + + + types = collect(typeof(h).parameters) + V = types[2] + E = types[3] + + for node_idx in 1:length(v_meta) + for edge_idx in 1:length(he_meta) + node = node_dict[node_idx] + if V == String + node = string(node) + end + + edge = edge_dict[edge_idx] + if E == String + edge = string(edge) + end + + weight = h[node_idx, edge_idx] + + if isnothing(weight) + continue + end + + push!(incidences, Dict( + "edge" => edge, + "node" => node, + "weight" => weight + )) + end + end + + json_hg[:incidences] = incidences + + JSON3.write(io, json_hg) +end + + +""" + hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String + ) where {U <: Real} + +Loads a hypergraph from a stream `io` from `HIF` format. +More info: https://github.com/pszufe/HIF-standard + +**Arguments** + +* `T` : type of weight values stored in the hypergraph's adjacency matrix +* `D` : dictionary for storing values the default is `Dict{Int, T}` +* `V` : type of values stored in the vertices of the hypergraph +* `E` : type of values stored in the edges of the hypergraph + +""" +function hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String + ) where {U <: Real} + _ = format + + data = JSON3.read(read(io, String)) + + nodes = get(data, "nodes", []) + edges = get(data, "edges", []) + + if length(nodes) == 0 || length(edges) == 0 + node_set = Set{V}() + edge_set = Set{E}() + + for inc in data.incidences + if inc.node ∉ node_set + push!(node_set, inc.node) + push!(nodes, inc.node) + end + + if inc.edge ∉ edge_set + push!(edge_set, inc.edge) + push!(edges, inc.edge) + end + end + else + nodes = [node.node for node in nodes] + edges = [edge.edge for edge in edges] + end + + sort!(nodes) + sort!(edges) + + node_dict = Dict(val => i for (i, val) in pairs(nodes)) + edge_dict = Dict(val => i for (i, val) in pairs(edges)) + + n = length(nodes) + k = length(edges) + + h = Hypergraph{T, V, E, D}(n, k, nodes, edges) + + for inc in data.incidences + node_idx = node_dict[inc.node] + he_idx = edge_dict[inc.edge] + + h[node_idx, he_idx] = inc.weight + end + + h +end + + +""" + hg_load( + fname::AbstractString; + format::Abstract_HG_format = HIF_Format(), + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String + ) where {U <: Real} + ) + +Loads a hypergraph from a file `fname`. +The default saving format is `json`. + +**Arguments** + +* `T` : type of weight values stored in the hypergraph's adjacency matrix +* `D` : dictionary for storing values the default is `Dict{Int, T}` +* `V` : type of values stored in the vertices of the hypergraph +* `E` : type of values stored in the edges of the hypergraph + +""" +function hg_load( + fname::AbstractString, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V::Union{Type{String}, Type{Int}} = String, + E::Union{Type{String}, Type{Int}} = String + ) where {U <: Real} + open(io -> hg_load(io, format; T=T, D=D, V=V, E=E), fname, "r") +end diff --git a/src/io.jl b/src/io.jl index 5e4fbd2..474fbde 100644 --- a/src/io.jl +++ b/src/io.jl @@ -3,8 +3,6 @@ abstract type Abstract_HG_format end struct HGF_Format <: Abstract_HG_format end struct JSON_Format <: Abstract_HG_format end -struct HIF_Format <: Abstract_HG_format end - """ hg_save(io::IO, h::Hypergraph, format::HGF_Format) @@ -75,75 +73,6 @@ hg_save( open(io -> hg_save(io, h, format), fname, "w") -""" - hg_save(io::IO, h::Hypergraph, format::HIF_Format) - -Saves a hypergraph `h` to an output stream `io` in `HIF` format. - -If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, -the user has to explicit tell the JSON3 package about it, for instance using: - -`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. - -See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. - -""" -function hg_save(io::IO, h::Hypergraph, format::HIF_Format) - _ = format - - json_hg = Dict{Symbol, Any}() - incidences = [] - v_meta = h.v_meta - he_meta = h.he_meta - - if any(isnothing, h.v_meta) - v_meta = [i for i in 1:length(h.v_meta)] - end - - if any(isnothing, h.he_meta) - he_meta = [i for i in 1:length(h.he_meta)] - end - - node_dict = Dict(i => val for (i, val) in pairs(v_meta)) - edge_dict = Dict(i => val for (i, val) in pairs(he_meta)) - - - types = collect(typeof(h).parameters) - V = types[2] - E = types[3] - - for node_idx in 1:length(v_meta) - for edge_idx in 1:length(he_meta) - node = node_dict[node_idx] - if V == String - node = string(node) - end - - edge = edge_dict[edge_idx] - if E == String - edge = string(edge) - end - - weight = h[node_idx, edge_idx] - - if isnothing(weight) - continue - end - - push!(incidences, Dict( - "edge" => edge, - "node" => node, - "weight" => weight - )) - end - end - - json_hg[:incidences] = incidences - - JSON3.write(io, json_hg) -end - - """ hg_load( io::IO, @@ -297,116 +226,3 @@ hg_load( V = Nothing, E = Nothing) where {U <: Real} = open(io -> hg_load(io, format; T=T, D=D, V=V, E=E), fname, "r") - - - -""" - hg_load( - io::IO, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - -Loads a hypergraph from a stream `io` from `HIF` format. -More info: https://github.com/pszufe/HIF-standard - -**Arguments** - -* `T` : type of weight values stored in the hypergraph's adjacency matrix -* `D` : dictionary for storing values the default is `Dict{Int, T}` -* `V` : type of values stored in the vertices of the hypergraph -* `E` : type of values stored in the edges of the hypergraph - -""" -function hg_load( - io::IO, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - _ = format - - data = JSON3.read(read(io, String)) - - nodes = get(data, "nodes", []) - edges = get(data, "edges", []) - - if length(nodes) == 0 || length(edges) == 0 - node_set = Set{V}() - edge_set = Set{E}() - - for inc in data.incidences - if inc.node ∉ node_set - push!(node_set, inc.node) - push!(nodes, inc.node) - end - - if inc.edge ∉ edge_set - push!(edge_set, inc.edge) - push!(edges, inc.edge) - end - end - else - nodes = [node.node for node in nodes] - edges = [edge.edge for edge in edges] - end - - sort!(nodes) - sort!(edges) - - node_dict = Dict(val => i for (i, val) in pairs(nodes)) - edge_dict = Dict(val => i for (i, val) in pairs(edges)) - - n = length(nodes) - k = length(edges) - - h = Hypergraph{T, V, E, D}(n, k, nodes, edges) - - for inc in data.incidences - node_idx = node_dict[inc.node] - he_idx = edge_dict[inc.edge] - - h[node_idx, he_idx] = inc.weight - end - - h -end - - -""" - hg_load( - fname::AbstractString; - format::Abstract_HG_format = HIF_Format(), - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - ) - -Loads a hypergraph from a file `fname`. -The default saving format is `json`. - -**Arguments** - -* `T` : type of weight values stored in the hypergraph's adjacency matrix -* `D` : dictionary for storing values the default is `Dict{Int, T}` -* `V` : type of values stored in the vertices of the hypergraph -* `E` : type of values stored in the edges of the hypergraph - -""" -function hg_load( - fname::AbstractString, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - open(io -> hg_load(io, format; T=T, D=D, V=V, E=E), fname, "r") -end From cf48d5215d9caadae686a3bcecf3b33dae42ca08 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 14:45:49 +0100 Subject: [PATCH 07/23] even out the diff --- src/hif_io.jl | 40 ++++++++++++++++++---------------------- src/io.jl | 1 + 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/hif_io.jl b/src/hif_io.jl index 514470e..0b2555b 100644 --- a/src/hif_io.jl +++ b/src/hif_io.jl @@ -17,17 +17,17 @@ See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more de function hg_save(io::IO, h::Hypergraph, format::HIF_Format) _ = format - json_hg = Dict{Symbol, Any}() + json_hg = Dict{Symbol,Any}() incidences = [] v_meta = h.v_meta he_meta = h.he_meta if any(isnothing, h.v_meta) - v_meta = [i for i in 1:length(h.v_meta)] + v_meta = [i for i = 1:length(h.v_meta)] end if any(isnothing, h.he_meta) - he_meta = [i for i in 1:length(h.he_meta)] + he_meta = [i for i = 1:length(h.he_meta)] end node_dict = Dict(i => val for (i, val) in pairs(v_meta)) @@ -38,8 +38,8 @@ function hg_save(io::IO, h::Hypergraph, format::HIF_Format) V = types[2] E = types[3] - for node_idx in 1:length(v_meta) - for edge_idx in 1:length(he_meta) + for node_idx = 1:length(v_meta) + for edge_idx = 1:length(he_meta) node = node_dict[node_idx] if V == String node = string(node) @@ -56,16 +56,12 @@ function hg_save(io::IO, h::Hypergraph, format::HIF_Format) continue end - push!(incidences, Dict( - "edge" => edge, - "node" => node, - "weight" => weight - )) + push!(incidences, Dict("edge" => edge, "node" => node, "weight" => weight)) end end json_hg[:incidences] = incidences - + JSON3.write(io, json_hg) end @@ -95,10 +91,10 @@ function hg_load( io::IO, format::HIF_Format; T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} + D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, + V::Union{Type{String},Type{Int}} = String, + E::Union{Type{String},Type{Int}} = String, +) where {U<:Real} _ = format data = JSON3.read(read(io, String)) @@ -123,7 +119,7 @@ function hg_load( end else nodes = [node.node for node in nodes] - edges = [edge.edge for edge in edges] + edges = [edge.edge for edge in edges] end sort!(nodes) @@ -135,7 +131,7 @@ function hg_load( n = length(nodes) k = length(edges) - h = Hypergraph{T, V, E, D}(n, k, nodes, edges) + h = Hypergraph{T,V,E,D}(n, k, nodes, edges) for inc in data.incidences node_idx = node_dict[inc.node] @@ -174,9 +170,9 @@ function hg_load( fname::AbstractString, format::HIF_Format; T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - open(io -> hg_load(io, format; T=T, D=D, V=V, E=E), fname, "r") + D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, + V::Union{Type{String},Type{Int}} = String, + E::Union{Type{String},Type{Int}} = String, +) where {U<:Real} + open(io -> hg_load(io, format; T = T, D = D, V = V, E = E), fname, "r") end diff --git a/src/io.jl b/src/io.jl index 474fbde..25afa9e 100644 --- a/src/io.jl +++ b/src/io.jl @@ -4,6 +4,7 @@ abstract type Abstract_HG_format end struct HGF_Format <: Abstract_HG_format end struct JSON_Format <: Abstract_HG_format end + """ hg_save(io::IO, h::Hypergraph, format::HGF_Format) From fa7f5261c69e2092ebaa13b8fe008406983d1e2d Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 16:10:44 +0100 Subject: [PATCH 08/23] code review 1 --- src/SimpleHypergraphs.jl | 1 + src/hif_io.jl | 46 ++++++++++++++-------------------------- src/utils.jl | 8 +++++++ 3 files changed, 25 insertions(+), 30 deletions(-) create mode 100644 src/utils.jl diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 0ad146a..74ca1f9 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -74,6 +74,7 @@ end include("hypergraph.jl") include("io.jl") include("hif_io.jl") +include("utils.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/hif_io.jl b/src/hif_io.jl index 0b2555b..d5d0c7e 100644 --- a/src/hif_io.jl +++ b/src/hif_io.jl @@ -14,48 +14,34 @@ the user has to explicit tell the JSON3 package about it, for instance using: See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. """ -function hg_save(io::IO, h::Hypergraph, format::HIF_Format) +function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} _ = format json_hg = Dict{Symbol,Any}() - incidences = [] - v_meta = h.v_meta - he_meta = h.he_meta + incidences = Vector{Dict{String, Union{String, Number}}}() + v_meta = Vector{Union{String, Int}}() + he_meta = Vector{Union{String, Int}}() if any(isnothing, h.v_meta) v_meta = [i for i = 1:length(h.v_meta)] + else + v_meta = [v for v in h.v_meta] end if any(isnothing, h.he_meta) he_meta = [i for i = 1:length(h.he_meta)] + else + he_meta = [he for he in h.he_meta] end node_dict = Dict(i => val for (i, val) in pairs(v_meta)) - edge_dict = Dict(i => val for (i, val) in pairs(he_meta)) - - - types = collect(typeof(h).parameters) - V = types[2] - E = types[3] - - for node_idx = 1:length(v_meta) - for edge_idx = 1:length(he_meta) - node = node_dict[node_idx] - if V == String - node = string(node) - end - - edge = edge_dict[edge_idx] - if E == String - edge = string(edge) - end - - weight = h[node_idx, edge_idx] - - if isnothing(weight) - continue - end + for node_idx = eachindex(v_meta) + edges = gethyperedges(h, node_idx) + node = cast_value(node_dict[node_idx], V) + + for (_edge, weight) in edges + edge = cast_value(_edge, E) push!(incidences, Dict("edge" => edge, "node" => node, "weight" => weight)) end end @@ -99,8 +85,8 @@ function hg_load( data = JSON3.read(read(io, String)) - nodes = get(data, "nodes", []) - edges = get(data, "edges", []) + nodes = get(data, "nodes", Vector{Union{String, Int}}()) + edges = get(data, "edges", Vector{Union{String, Int}}()) if length(nodes) == 0 || length(edges) == 0 node_set = Set{V}() diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 0000000..13edd8c --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,8 @@ +function cast_value(val::Union{String, Int}, t::Type{String}) + return string(val) +end + + +function cast_value(val::Int, t::Type{Int}) + return val +end From e71787c4709e1f5257866626b2b53d176b85be49 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 20 Mar 2025 16:25:27 +0100 Subject: [PATCH 09/23] code review 2 --- src/hif_io.jl | 15 +-------------- src/utils.jl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/hif_io.jl b/src/hif_io.jl index d5d0c7e..56ad18c 100644 --- a/src/hif_io.jl +++ b/src/hif_io.jl @@ -19,20 +19,7 @@ function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T json_hg = Dict{Symbol,Any}() incidences = Vector{Dict{String, Union{String, Number}}}() - v_meta = Vector{Union{String, Int}}() - he_meta = Vector{Union{String, Int}}() - - if any(isnothing, h.v_meta) - v_meta = [i for i = 1:length(h.v_meta)] - else - v_meta = [v for v in h.v_meta] - end - - if any(isnothing, h.he_meta) - he_meta = [i for i = 1:length(h.he_meta)] - else - he_meta = [he for he in h.he_meta] - end + v_meta = handle_metadata(h.v_meta) node_dict = Dict(i => val for (i, val) in pairs(v_meta)) diff --git a/src/utils.jl b/src/utils.jl index 13edd8c..36d6e20 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -6,3 +6,16 @@ end function cast_value(val::Int, t::Type{Int}) return val end + + +function handle_metadata(metadata::Array) + result = Vector{Union{String, Int}}() + + if any(isnothing, metadata) + append!(result, 1:length(metadata)) + else + append!(result, metadata) + end + + return result +end From f069829e70d1d44b326869cdf7d1db5f0ef38f50 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sun, 23 Mar 2025 18:51:04 +0100 Subject: [PATCH 10/23] restructure + better metadata handling in load --- src/SimpleHypergraphs.jl | 6 +- src/hif/hif.jl | 1 + src/hif/hif_load.jl | 134 ++++++++++++++++ src/hif/hif_save.jl | 60 +++++++ src/hif_io.jl | 151 ------------------ src/utils.jl | 21 --- test/data/HIF-standard/README.md | 6 + .../HIF-standard/duplicated_nodes_edges.json | 7 + test/data/HIF-standard/empty_arrays.json | 7 + test/data/HIF-standard/empty_hypergraph.json | 3 + ...etadata_with_deeply_nested_attributes.json | 15 ++ .../metadata_with_nested_attributes.json | 13 ++ test/data/HIF-standard/missing_direction.json | 5 + test/data/HIF-standard/single_edge.json | 8 + .../HIF-standard/single_edge_with_attrs.json | 12 ++ test/data/HIF-standard/single_incidence.json | 8 + .../single_incidence_with_attrs.json | 12 ++ .../single_incidence_with_weights.json | 9 ++ test/data/HIF-standard/single_node.json | 8 + .../HIF-standard/single_node_with_attrs.json | 13 ++ .../HIF-standard/valid_incidence_head.json | 5 + .../HIF-standard/valid_incidence_tail.json | 5 + 22 files changed, 335 insertions(+), 174 deletions(-) create mode 100644 src/hif/hif.jl create mode 100644 src/hif/hif_load.jl create mode 100644 src/hif/hif_save.jl delete mode 100644 src/hif_io.jl delete mode 100644 src/utils.jl create mode 100644 test/data/HIF-standard/README.md create mode 100644 test/data/HIF-standard/duplicated_nodes_edges.json create mode 100644 test/data/HIF-standard/empty_arrays.json create mode 100644 test/data/HIF-standard/empty_hypergraph.json create mode 100644 test/data/HIF-standard/metadata_with_deeply_nested_attributes.json create mode 100644 test/data/HIF-standard/metadata_with_nested_attributes.json create mode 100644 test/data/HIF-standard/missing_direction.json create mode 100644 test/data/HIF-standard/single_edge.json create mode 100644 test/data/HIF-standard/single_edge_with_attrs.json create mode 100644 test/data/HIF-standard/single_incidence.json create mode 100644 test/data/HIF-standard/single_incidence_with_attrs.json create mode 100644 test/data/HIF-standard/single_incidence_with_weights.json create mode 100644 test/data/HIF-standard/single_node.json create mode 100644 test/data/HIF-standard/single_node_with_attrs.json create mode 100644 test/data/HIF-standard/valid_incidence_head.json create mode 100644 test/data/HIF-standard/valid_incidence_tail.json diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 74ca1f9..7cafdbb 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -73,8 +73,10 @@ end include("hypergraph.jl") include("io.jl") -include("hif_io.jl") -include("utils.jl") + +include("hif/hif.jl") +include("hif/hif_load.jl") +include("hif/hif_save.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/hif/hif.jl b/src/hif/hif.jl new file mode 100644 index 0000000..4c54f56 --- /dev/null +++ b/src/hif/hif.jl @@ -0,0 +1 @@ +struct HIF_Format <: Abstract_HG_format end diff --git a/src/hif/hif_load.jl b/src/hif/hif_load.jl new file mode 100644 index 0000000..2dda203 --- /dev/null +++ b/src/hif/hif_load.jl @@ -0,0 +1,134 @@ +using JSON3 + +""" + hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, + ) where {U <: Real} + +Loads a hypergraph from a stream `io` from `HIF` format. +More info: https://github.com/pszufe/HIF-standard + +**Arguments** + +* `T` : type of weight values stored in the hypergraph's adjacency matrix +* `D` : dictionary for storing values the default is `Dict{Int, T}` +""" +function hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, +) where {U<:Real} + _ = format + + data = JSON3.read(read(io, String)) + + if !haskey(data, "incidences") + throw(ArgumentError("Invalid JSON schema: missing required key 'incidences'")) + end + + nodes, edges = get_nodes_and_edges(data) + + h = init_hypergraph(data, length(nodes), length(edges), T, D) + + add_weights_from_incidences!(h, data.incidences, nodes, edges) + + h +end + + +function init_hypergraph( + data::JSON3.Object, + n::Int64, + k::Int64, + T::Type{U}, + D::Type{<:AbstractDict{Int,U}}, +) where {U<:Real} + node_metadata = Vector{Union{JSON3.Object, Nothing}}() + edge_metadata = Vector{Union{JSON3.Object, Nothing}}() + + if haskey(data, "nodes") + append!(node_metadata, data["nodes"]) + else + append!(node_metadata, [nothing for _ in 1:n]) + end + + if haskey(data, "edges") + append!(edge_metadata, data["edges"]) + else + append!(edge_metadata, [nothing for _ in 1:k]) + end + + return Hypergraph{T,JSON3.Object,JSON3.Object,D}(n, k, node_metadata, edge_metadata) +end + + +function get_nodes_and_edges(data::JSON3.Object) + node_set = Set{Union{String, Int}}() + edge_set = Set{Union{String, Int}}() + + nodes = Vector{Union{String, Int}}() + edges = Vector{Union{String, Int}}() + + for inc in data.incidences + if inc.node ∉ node_set + push!(node_set, inc.node) + push!(nodes, inc.node) + end + + if inc.edge ∉ edge_set + push!(edge_set, inc.edge) + push!(edges, inc.edge) + end + end + + sort!(nodes) + sort!(edges) + + return nodes, edges +end + + +function add_weights_from_incidences!( + h::Hypergraph, + incidences::JSON3.Array{JSON3.Object}, + nodes::Vector{Union{String, Int}}, + edges::Vector{Union{String, Int}} + ) + node_dict = Dict(val => i for (i, val) in pairs(nodes)) + edge_dict = Dict(val => i for (i, val) in pairs(edges)) + + for inc in incidences + node_idx = node_dict[inc.node] + he_idx = edge_dict[inc.edge] + + h[node_idx, he_idx] = inc.weight + end +end + + +""" + hg_load( + fname::AbstractString; + format::Abstract_HG_format = HIF_Format(), + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, + ) where {U <: Real} + ) +Loads a hypergraph from a file `fname`. +The default saving format is `json`. +**Arguments** +* `T` : type of weight values stored in the hypergraph's adjacency matrix +* `D` : dictionary for storing values the default is `Dict{Int, T}` +""" +function hg_load( + fname::AbstractString, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, +) where {U<:Real} + open(io -> hg_load(io, format; T = T, D = D), fname, "r") +end diff --git a/src/hif/hif_save.jl b/src/hif/hif_save.jl new file mode 100644 index 0000000..a73d385 --- /dev/null +++ b/src/hif/hif_save.jl @@ -0,0 +1,60 @@ +using JSON3 + +""" + hg_save(io::IO, h::Hypergraph, format::HIF_Format) + +Saves a hypergraph `h` to an output stream `io` in `HIF` format. + +If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, +the user has to explicit tell the JSON3 package about it, for instance using: + +`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. + +See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. + +""" +function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} + _ = format + + json_hg = Dict{Symbol,Any}() + incidences = Vector{Dict{String, Union{String, Number}}}() + v_meta = handle_metadata(h.v_meta) + + node_dict = Dict(i => val for (i, val) in pairs(v_meta)) + + for node_idx = eachindex(v_meta) + edges = gethyperedges(h, node_idx) + node = cast_value(node_dict[node_idx], V) + + for (_edge, weight) in edges + edge = cast_value(_edge, E) + push!(incidences, Dict("edge" => edge, "node" => node, "weight" => weight)) + end + end + + json_hg[:incidences] = incidences + + JSON3.write(io, json_hg) +end + +function cast_value(val::Union{String, Int}, t::Type{String}) + return string(val) +end + + +function cast_value(val::Union{Int, JSON3.Object}, t::Type{Union{Int, JSON3.Object}}) + return val +end + + +function handle_metadata(metadata::Array) + result = Vector{Union{String, Int, JSON3.Object}}() + + if any(isnothing, metadata) + append!(result, 1:length(metadata)) + else + append!(result, metadata) + end + + return result +end diff --git a/src/hif_io.jl b/src/hif_io.jl deleted file mode 100644 index 56ad18c..0000000 --- a/src/hif_io.jl +++ /dev/null @@ -1,151 +0,0 @@ -struct HIF_Format <: Abstract_HG_format end - - -""" - hg_save(io::IO, h::Hypergraph, format::HIF_Format) - -Saves a hypergraph `h` to an output stream `io` in `HIF` format. - -If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, -the user has to explicit tell the JSON3 package about it, for instance using: - -`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. - -See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. - -""" -function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} - _ = format - - json_hg = Dict{Symbol,Any}() - incidences = Vector{Dict{String, Union{String, Number}}}() - v_meta = handle_metadata(h.v_meta) - - node_dict = Dict(i => val for (i, val) in pairs(v_meta)) - - for node_idx = eachindex(v_meta) - edges = gethyperedges(h, node_idx) - node = cast_value(node_dict[node_idx], V) - - for (_edge, weight) in edges - edge = cast_value(_edge, E) - push!(incidences, Dict("edge" => edge, "node" => node, "weight" => weight)) - end - end - - json_hg[:incidences] = incidences - - JSON3.write(io, json_hg) -end - - -""" - hg_load( - io::IO, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - -Loads a hypergraph from a stream `io` from `HIF` format. -More info: https://github.com/pszufe/HIF-standard - -**Arguments** - -* `T` : type of weight values stored in the hypergraph's adjacency matrix -* `D` : dictionary for storing values the default is `Dict{Int, T}` -* `V` : type of values stored in the vertices of the hypergraph -* `E` : type of values stored in the edges of the hypergraph - -""" -function hg_load( - io::IO, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, - V::Union{Type{String},Type{Int}} = String, - E::Union{Type{String},Type{Int}} = String, -) where {U<:Real} - _ = format - - data = JSON3.read(read(io, String)) - - nodes = get(data, "nodes", Vector{Union{String, Int}}()) - edges = get(data, "edges", Vector{Union{String, Int}}()) - - if length(nodes) == 0 || length(edges) == 0 - node_set = Set{V}() - edge_set = Set{E}() - - for inc in data.incidences - if inc.node ∉ node_set - push!(node_set, inc.node) - push!(nodes, inc.node) - end - - if inc.edge ∉ edge_set - push!(edge_set, inc.edge) - push!(edges, inc.edge) - end - end - else - nodes = [node.node for node in nodes] - edges = [edge.edge for edge in edges] - end - - sort!(nodes) - sort!(edges) - - node_dict = Dict(val => i for (i, val) in pairs(nodes)) - edge_dict = Dict(val => i for (i, val) in pairs(edges)) - - n = length(nodes) - k = length(edges) - - h = Hypergraph{T,V,E,D}(n, k, nodes, edges) - - for inc in data.incidences - node_idx = node_dict[inc.node] - he_idx = edge_dict[inc.edge] - - h[node_idx, he_idx] = inc.weight - end - - h -end - - -""" - hg_load( - fname::AbstractString; - format::Abstract_HG_format = HIF_Format(), - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - V::Union{Type{String}, Type{Int}} = String, - E::Union{Type{String}, Type{Int}} = String - ) where {U <: Real} - ) - -Loads a hypergraph from a file `fname`. -The default saving format is `json`. - -**Arguments** - -* `T` : type of weight values stored in the hypergraph's adjacency matrix -* `D` : dictionary for storing values the default is `Dict{Int, T}` -* `V` : type of values stored in the vertices of the hypergraph -* `E` : type of values stored in the edges of the hypergraph - -""" -function hg_load( - fname::AbstractString, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, - V::Union{Type{String},Type{Int}} = String, - E::Union{Type{String},Type{Int}} = String, -) where {U<:Real} - open(io -> hg_load(io, format; T = T, D = D, V = V, E = E), fname, "r") -end diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index 36d6e20..0000000 --- a/src/utils.jl +++ /dev/null @@ -1,21 +0,0 @@ -function cast_value(val::Union{String, Int}, t::Type{String}) - return string(val) -end - - -function cast_value(val::Int, t::Type{Int}) - return val -end - - -function handle_metadata(metadata::Array) - result = Vector{Union{String, Int}}() - - if any(isnothing, metadata) - append!(result, 1:length(metadata)) - else - append!(result, metadata) - end - - return result -end diff --git a/test/data/HIF-standard/README.md b/test/data/HIF-standard/README.md new file mode 100644 index 0000000..cc3f453 --- /dev/null +++ b/test/data/HIF-standard/README.md @@ -0,0 +1,6 @@ +# HIF-standard + +In this directory you can find files used to test import and export of Hypergraphs in the `HIF` format. + +Read more about `HIF` here: +https://github.com/pszufe/HIF-standard diff --git a/test/data/HIF-standard/duplicated_nodes_edges.json b/test/data/HIF-standard/duplicated_nodes_edges.json new file mode 100644 index 0000000..a02124e --- /dev/null +++ b/test/data/HIF-standard/duplicated_nodes_edges.json @@ -0,0 +1,7 @@ +{ + "network-type": "undirected", + "metadata": {}, + "nodes": [{"node": "n1"}, {"node": "n1"}], + "edges": [{"edge": "e1"}, {"edge": "e1"}], + "incidences": [{"edge": "e1", "node": "n1"}, {"edge": "e1", "node": "n1"}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/empty_arrays.json b/test/data/HIF-standard/empty_arrays.json new file mode 100644 index 0000000..7b0ce2d --- /dev/null +++ b/test/data/HIF-standard/empty_arrays.json @@ -0,0 +1,7 @@ +{ + "network-type": "undirected", + "metadata": {}, + "incidences": [], + "nodes": [], + "edges": [] +} \ No newline at end of file diff --git a/test/data/HIF-standard/empty_hypergraph.json b/test/data/HIF-standard/empty_hypergraph.json new file mode 100644 index 0000000..7a65310 --- /dev/null +++ b/test/data/HIF-standard/empty_hypergraph.json @@ -0,0 +1,3 @@ +{ + "incidences": [] +} \ No newline at end of file diff --git a/test/data/HIF-standard/metadata_with_deeply_nested_attributes.json b/test/data/HIF-standard/metadata_with_deeply_nested_attributes.json new file mode 100644 index 0000000..bd78510 --- /dev/null +++ b/test/data/HIF-standard/metadata_with_deeply_nested_attributes.json @@ -0,0 +1,15 @@ +{ + "network-type": "asc", + "metadata": { + "level1": { + "level2": { + "level3": { + "key": "value" + } + } + } + }, + "incidences": [{"edge": 1, "node": 2}], + "nodes": [{"node": "n1", "attrs": {"nested_attr": {"key1": "value1"}}}], + "edges": [{"edge": "e1", "attrs": {"nested_attr": {"key2": "value2"}}}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/metadata_with_nested_attributes.json b/test/data/HIF-standard/metadata_with_nested_attributes.json new file mode 100644 index 0000000..697be84 --- /dev/null +++ b/test/data/HIF-standard/metadata_with_nested_attributes.json @@ -0,0 +1,13 @@ +{ + "network-type": "asc", + "metadata": { + "creator": "nested_test", + "extra_info": { + "key1": "value1", + "key2": "value2" + } + }, + "incidences": [{"edge": 10, "node": 20}], + "nodes": [{"node": 20, "attrs": {"color": "blue", "size": "large"}}], + "edges": [{"edge": 10, "attrs": {"priority": "high"}}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/missing_direction.json b/test/data/HIF-standard/missing_direction.json new file mode 100644 index 0000000..f0a4be6 --- /dev/null +++ b/test/data/HIF-standard/missing_direction.json @@ -0,0 +1,5 @@ +{ + "network-type": "directed", + "metadata": {}, + "incidences": [{"edge": 1, "node": 2}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_edge.json b/test/data/HIF-standard/single_edge.json new file mode 100644 index 0000000..6688947 --- /dev/null +++ b/test/data/HIF-standard/single_edge.json @@ -0,0 +1,8 @@ +{ + "incidences": [], + "edges": [ + { + "edge": 3 + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_edge_with_attrs.json b/test/data/HIF-standard/single_edge_with_attrs.json new file mode 100644 index 0000000..4e5871e --- /dev/null +++ b/test/data/HIF-standard/single_edge_with_attrs.json @@ -0,0 +1,12 @@ +{ + "incidences": [], + "edges": [ + { + "edge": 3, + "attrs": { + "timestamp": "2020-04-01", + "weight": 2.0 + } + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_incidence.json b/test/data/HIF-standard/single_incidence.json new file mode 100644 index 0000000..1453273 --- /dev/null +++ b/test/data/HIF-standard/single_incidence.json @@ -0,0 +1,8 @@ +{ + "incidences": [ + { + "edge": "abcd", + "node": 42 + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_incidence_with_attrs.json b/test/data/HIF-standard/single_incidence_with_attrs.json new file mode 100644 index 0000000..5d5dece --- /dev/null +++ b/test/data/HIF-standard/single_incidence_with_attrs.json @@ -0,0 +1,12 @@ +{ + "incidences": [ + { + "edge": "abcd", + "node": 42, + "attrs": { + "role": "PI", + "age": 42 + } + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_incidence_with_weights.json b/test/data/HIF-standard/single_incidence_with_weights.json new file mode 100644 index 0000000..52cb6a4 --- /dev/null +++ b/test/data/HIF-standard/single_incidence_with_weights.json @@ -0,0 +1,9 @@ +{ + "incidences": [ + { + "edge": "abcd", + "node": 42, + "weight": -2 + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_node.json b/test/data/HIF-standard/single_node.json new file mode 100644 index 0000000..54987b2 --- /dev/null +++ b/test/data/HIF-standard/single_node.json @@ -0,0 +1,8 @@ +{ + "incidences": [], + "nodes": [ + { + "node": 42 + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_node_with_attrs.json b/test/data/HIF-standard/single_node_with_attrs.json new file mode 100644 index 0000000..efb03ef --- /dev/null +++ b/test/data/HIF-standard/single_node_with_attrs.json @@ -0,0 +1,13 @@ +{ + "incidences": [], + "nodes": [ + { + "node": 42, + "attrs": { + "weight": 2, + "color": "blue", + "online": true + } + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/valid_incidence_head.json b/test/data/HIF-standard/valid_incidence_head.json new file mode 100644 index 0000000..ed554cc --- /dev/null +++ b/test/data/HIF-standard/valid_incidence_head.json @@ -0,0 +1,5 @@ +{ + "network-type": "directed", + "metadata": {}, + "incidences": [{"edge": 1, "node": 2, "direction": "head"}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/valid_incidence_tail.json b/test/data/HIF-standard/valid_incidence_tail.json new file mode 100644 index 0000000..3aa3bb2 --- /dev/null +++ b/test/data/HIF-standard/valid_incidence_tail.json @@ -0,0 +1,5 @@ +{ + "network-type": "directed", + "metadata": {}, + "incidences": [{"edge": 1, "node": 2, "direction": "tail"}] +} \ No newline at end of file From 8fa099d15ec4c9cc79770f5ba2427a9fa3736b1b Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Mon, 24 Mar 2025 13:07:18 +0100 Subject: [PATCH 11/23] improve saving --- src/hif/hif_save.jl | 130 +++++++++++++++++++++++++++++++++++++------- test/runtests.jl | 2 +- 2 files changed, 110 insertions(+), 22 deletions(-) diff --git a/src/hif/hif_save.jl b/src/hif/hif_save.jl index a73d385..346deed 100644 --- a/src/hif/hif_save.jl +++ b/src/hif/hif_save.jl @@ -1,5 +1,7 @@ using JSON3 +HIFEntryType = Dict{String, Union{String, Number, JSON3.Object}} + """ hg_save(io::IO, h::Hypergraph, format::HIF_Format) @@ -17,44 +19,130 @@ function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T _ = format json_hg = Dict{Symbol,Any}() - incidences = Vector{Dict{String, Union{String, Number}}}() - v_meta = handle_metadata(h.v_meta) - node_dict = Dict(i => val for (i, val) in pairs(v_meta)) + nodes_meta = prepare_metadata(h.v_meta, handle_node) + edges_meta = prepare_metadata(h.he_meta, handle_edge) + + incidences = prepare_incidences(h) + + json_hg[:incidences] = incidences + + if !isempty(nodes_meta) + json_hg[:nodes] = nodes_meta + end + + if !isempty(edges_meta) + json_hg[:edges] = edges_meta + end - for node_idx = eachindex(v_meta) + JSON3.write(io, json_hg) +end + + +function prepare_incidences(h::Hypergraph{T, V, E, D}) where {T, V, E, D} + incidences = Vector{HIFEntryType}() + + node_dict = Dict(i => val for (i, val) in pairs(h.v_meta)) + edge_dict = Dict(i => val for (i, val) in pairs(h.he_meta)) + + for node_idx = eachindex(h.v_meta) edges = gethyperedges(h, node_idx) - node = cast_value(node_dict[node_idx], V) + node = isnothing(node_dict[node_idx]) ? node_idx : node_dict[node_idx] + + _node = (V == JSON3.Object) ? node["node"] : node + - for (_edge, weight) in edges - edge = cast_value(_edge, E) - push!(incidences, Dict("edge" => edge, "node" => node, "weight" => weight)) + for (edge, weight) in edges + if isnothing(weight) + continue + end + + _edge = isnothing(edge_dict[edge]) ? edge : edge_dict[edge] + + push!(incidences, Dict("edge" => _edge, "node" => _node, "weight" => weight)) end end - json_hg[:incidences] = incidences + return incidences +end - JSON3.write(io, json_hg) + +function prepare_metadata( + metadata::Vector{Union{T, Nothing}}, + handling_func::Function +) where {T} + result = Vector{HIFEntryType}() + + for item in metadata + if isnothing(item) + continue + end + + handled = handling_func(item) + push!(result, handled) + end + + return result end -function cast_value(val::Union{String, Int}, t::Type{String}) - return string(val) + +function handle_node(node::Union{String, Int}) + return Dict{String, Union{String, Int}}( + "node" => node + ) end +function handle_node(node::JSON3.Object) + result = HIFEntryType( + "node" => node["node"] + ) -function cast_value(val::Union{Int, JSON3.Object}, t::Type{Union{Int, JSON3.Object}}) - return val + add_optional_params!(result, node) + + return result end -function handle_metadata(metadata::Array) - result = Vector{Union{String, Int, JSON3.Object}}() +function handle_edge(edge::Union{String, Int}) + return Dict{String, Union{String, Int}}( + "edge" => edge + ) +end - if any(isnothing, metadata) - append!(result, 1:length(metadata)) - else - append!(result, metadata) - end + +function handle_edge(edge::JSON3.Object) + result = HIFEntryType( + "edge" => edge["edge"] + ) + + add_optional_params!(result, edge) return result end + + +function add_optional_params!(result::HIFEntryType, item::JSON3.Object) + if haskey(item, "weight") + result["weight"] = item["weight"] + end + + if haskey(item, "attrs") + result["attrs"] = item["attrs"] + end +end + + +function cast_value(val::Union{String, Int}, t::Type{String}) + return string(val) +end + + +function cast_value(val::Int, t::Type{Int}) + return val +end + + +function cast_value(val::JSON3.Object, t::Type{JSON3.Object}) + return val +end + diff --git a/test/runtests.jl b/test/runtests.jl index a1b8591..86b2e64 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,7 +74,7 @@ h1[5,2] = 6.5 @test get_hyperedge_meta(h1, 2) == get_hyperedge_meta(loaded_hg, 2) hg_save(path, h1, format=HIF_Format()) - loaded_hg = hg_load(path; format=HIF_Format(), T=Float64, V=Int, E=String) + loaded_hg = hg_load(path, HIF_Format(), T=Float64) @test h1 == loaded_hg end From 94ce937280e579f8ec8663d2edd43bd6a75588c5 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Mon, 31 Mar 2025 21:14:14 +0200 Subject: [PATCH 12/23] improve loading --- src/hif/hif_load.jl | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/hif/hif_load.jl b/src/hif/hif_load.jl index 2dda203..ab67dd3 100644 --- a/src/hif/hif_load.jl +++ b/src/hif/hif_load.jl @@ -24,7 +24,7 @@ function hg_load( ) where {U<:Real} _ = format - data = JSON3.read(read(io, String)) + data = JSON3.read(read(io, String), Dict{String, Any}) if !haskey(data, "incidences") throw(ArgumentError("Invalid JSON schema: missing required key 'incidences'")) @@ -34,21 +34,21 @@ function hg_load( h = init_hypergraph(data, length(nodes), length(edges), T, D) - add_weights_from_incidences!(h, data.incidences, nodes, edges) + add_weights_from_incidences!(h, data["incidences"], nodes, edges) h end function init_hypergraph( - data::JSON3.Object, + data::Dict{String, Any}, n::Int64, k::Int64, T::Type{U}, D::Type{<:AbstractDict{Int,U}}, ) where {U<:Real} - node_metadata = Vector{Union{JSON3.Object, Nothing}}() - edge_metadata = Vector{Union{JSON3.Object, Nothing}}() + node_metadata = Vector{Union{Dict{String, Any}, Nothing}}() + edge_metadata = Vector{Union{Dict{String, Any}, Nothing}}() if haskey(data, "nodes") append!(node_metadata, data["nodes"]) @@ -62,39 +62,36 @@ function init_hypergraph( append!(edge_metadata, [nothing for _ in 1:k]) end - return Hypergraph{T,JSON3.Object,JSON3.Object,D}(n, k, node_metadata, edge_metadata) + return Hypergraph{T,Dict{String, Any},Dict{String, Any},D}(n, k, node_metadata, edge_metadata) end -function get_nodes_and_edges(data::JSON3.Object) +function get_nodes_and_edges(data::Dict{String, Any}) node_set = Set{Union{String, Int}}() edge_set = Set{Union{String, Int}}() nodes = Vector{Union{String, Int}}() edges = Vector{Union{String, Int}}() - for inc in data.incidences - if inc.node ∉ node_set - push!(node_set, inc.node) - push!(nodes, inc.node) + for inc in data["incidences"] + if inc["node"] ∉ node_set + push!(node_set, inc["node"]) + push!(nodes, inc["node"]) end - if inc.edge ∉ edge_set - push!(edge_set, inc.edge) - push!(edges, inc.edge) + if inc["edge"] ∉ edge_set + push!(edge_set, inc["edge"]) + push!(edges, inc["edge"]) end end - sort!(nodes) - sort!(edges) - return nodes, edges end function add_weights_from_incidences!( h::Hypergraph, - incidences::JSON3.Array{JSON3.Object}, + incidences::AbstractVector, nodes::Vector{Union{String, Int}}, edges::Vector{Union{String, Int}} ) @@ -102,10 +99,10 @@ function add_weights_from_incidences!( edge_dict = Dict(val => i for (i, val) in pairs(edges)) for inc in incidences - node_idx = node_dict[inc.node] - he_idx = edge_dict[inc.edge] + node_idx = node_dict[inc["node"]] + he_idx = edge_dict[inc["edge"]] - h[node_idx, he_idx] = inc.weight + h[node_idx, he_idx] = inc["weight"] end end From c4c8be242f73ad6dc5084821bf694eb1fe610006 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sun, 6 Apr 2025 12:26:07 +0200 Subject: [PATCH 13/23] update --- src/hif/hif_load.jl | 88 ++++++++++++++++++++++++++++++--------------- src/hif/hif_save.jl | 46 +++++++----------------- test/runtests.jl | 16 +++++++-- 3 files changed, 86 insertions(+), 64 deletions(-) diff --git a/src/hif/hif_load.jl b/src/hif/hif_load.jl index ab67dd3..ba84644 100644 --- a/src/hif/hif_load.jl +++ b/src/hif/hif_load.jl @@ -20,19 +20,19 @@ function hg_load( io::IO, format::HIF_Format; T::Type{U} = Bool, - D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V = Nothing, + E = Nothing ) where {U<:Real} - _ = format - data = JSON3.read(read(io, String), Dict{String, Any}) if !haskey(data, "incidences") throw(ArgumentError("Invalid JSON schema: missing required key 'incidences'")) end - nodes, edges = get_nodes_and_edges(data) + nodes, edges = get_nodes_and_edges(data, V, E) - h = init_hypergraph(data, length(nodes), length(edges), T, D) + h = init_hypergraph(data, nodes, edges, T, D, V, E) add_weights_from_incidences!(h, data["incidences"], nodes, edges) @@ -42,31 +42,51 @@ end function init_hypergraph( data::Dict{String, Any}, - n::Int64, - k::Int64, + nodes::AbstractVector{Union{String, Int}}, + edges::AbstractVector{Union{String, Int}}, T::Type{U}, D::Type{<:AbstractDict{Int,U}}, + V = Nothing, + E = Nothing ) where {U<:Real} - node_metadata = Vector{Union{Dict{String, Any}, Nothing}}() - edge_metadata = Vector{Union{Dict{String, Any}, Nothing}}() + node_metadata = Vector{Union{V, Nothing}}([nothing for _ in 1:length(nodes)]) + edge_metadata = Vector{Union{E, Nothing}}([nothing for _ in 1:length(edges)]) if haskey(data, "nodes") - append!(node_metadata, data["nodes"]) - else - append!(node_metadata, [nothing for _ in 1:n]) + tmp = [node_obj["node"] for node_obj in data["nodes"]] + s_tmp = Set{V}() + + for (i, node) in pairs(tmp) + if node in s_tmp + continue + end + + node_metadata[i] = node + push!(s_tmp, node) + end end if haskey(data, "edges") - append!(edge_metadata, data["edges"]) + tmp = [edge_obj["edge"] for edge_obj in data["edges"]] + s_tmp = Set{E}() + + for (i, edge) in pairs(tmp) + if edge in s_tmp + continue + end + + edge_metadata[i] = edge + push!(s_tmp, edge) + end else - append!(edge_metadata, [nothing for _ in 1:k]) + append!(edge_metadata, [nothing for _ in 1:length(edges)]) end - return Hypergraph{T,Dict{String, Any},Dict{String, Any},D}(n, k, node_metadata, edge_metadata) + return Hypergraph{T,V,E,D}(length(nodes), length(edges), node_metadata, edge_metadata) end -function get_nodes_and_edges(data::Dict{String, Any}) +function get_nodes_and_edges(data::Dict{String, Any}, V, E) node_set = Set{Union{String, Int}}() edge_set = Set{Union{String, Int}}() @@ -74,35 +94,43 @@ function get_nodes_and_edges(data::Dict{String, Any}) edges = Vector{Union{String, Int}}() for inc in data["incidences"] - if inc["node"] ∉ node_set - push!(node_set, inc["node"]) - push!(nodes, inc["node"]) + node = (V == String) ? string(inc["node"]) : inc["node"] + edge = (E == String) ? string(inc["edge"]) : inc["edge"] + + if node ∉ node_set + push!(node_set, node) + push!(nodes, node) end - if inc["edge"] ∉ edge_set - push!(edge_set, inc["edge"]) - push!(edges, inc["edge"]) + if edge ∉ edge_set + push!(edge_set, edge) + push!(edges, edge) end end + sort!(nodes) + sort!(edges) + return nodes, edges end function add_weights_from_incidences!( - h::Hypergraph, + h::Hypergraph{T,V,E,D}, incidences::AbstractVector, nodes::Vector{Union{String, Int}}, edges::Vector{Union{String, Int}} - ) + ) where {T, V, E, D} node_dict = Dict(val => i for (i, val) in pairs(nodes)) edge_dict = Dict(val => i for (i, val) in pairs(edges)) for inc in incidences - node_idx = node_dict[inc["node"]] - he_idx = edge_dict[inc["edge"]] + node = (V == String) ? string(inc["node"]) : inc["node"] + edge = (E == String) ? string(inc["edge"]) : inc["edge"] + node_idx = node_dict[node] + he_idx = edge_dict[edge] - h[node_idx, he_idx] = inc["weight"] + h[node_idx, he_idx] = haskey(inc, "weight") ? inc["weight"] : 1 end end @@ -125,7 +153,9 @@ function hg_load( fname::AbstractString, format::HIF_Format; T::Type{U} = Bool, - D::Type{<:AbstractDict{Int,U}} = Dict{Int,T}, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V = Nothing, + E = Nothing ) where {U<:Real} - open(io -> hg_load(io, format; T = T, D = D), fname, "r") + open(io -> hg_load(io, format; T = T, D = D, V = V, E = E), fname, "r") end diff --git a/src/hif/hif_save.jl b/src/hif/hif_save.jl index 346deed..2382dd0 100644 --- a/src/hif/hif_save.jl +++ b/src/hif/hif_save.jl @@ -1,6 +1,6 @@ using JSON3 -HIFEntryType = Dict{String, Union{String, Number, JSON3.Object}} +HIFEntryType = Dict{String, Union{String, Number, Dict{String, Any}}} """ hg_save(io::IO, h::Hypergraph, format::HIF_Format) @@ -42,24 +42,20 @@ end function prepare_incidences(h::Hypergraph{T, V, E, D}) where {T, V, E, D} incidences = Vector{HIFEntryType}() - node_dict = Dict(i => val for (i, val) in pairs(h.v_meta)) - edge_dict = Dict(i => val for (i, val) in pairs(h.he_meta)) - - for node_idx = eachindex(h.v_meta) + for node_idx in eachindex(h.v_meta) edges = gethyperedges(h, node_idx) - node = isnothing(node_dict[node_idx]) ? node_idx : node_dict[node_idx] - _node = (V == JSON3.Object) ? node["node"] : node + node = isnothing(h.v_meta[node_idx]) ? node_idx : h.v_meta[node_idx] + + _node = (V == Dict{String, Any}) ? node["node"] : node - for (edge, weight) in edges - if isnothing(weight) - continue + _edge = isnothing(h.he_meta[edge]) ? edge : h.he_meta[edge] + if T == Bool + push!(incidences, Dict("edge" => _edge, "node" => _node)) + else + push!(incidences, Dict("edge" => _edge, "node" => _node, "weight" => weight)) end - - _edge = isnothing(edge_dict[edge]) ? edge : edge_dict[edge] - - push!(incidences, Dict("edge" => _edge, "node" => _node, "weight" => weight)) end end @@ -92,7 +88,7 @@ function handle_node(node::Union{String, Int}) ) end -function handle_node(node::JSON3.Object) +function handle_node(node::Dict{String, Any}) result = HIFEntryType( "node" => node["node"] ) @@ -110,7 +106,7 @@ function handle_edge(edge::Union{String, Int}) end -function handle_edge(edge::JSON3.Object) +function handle_edge(edge::Dict{String, Any}) result = HIFEntryType( "edge" => edge["edge"] ) @@ -121,7 +117,7 @@ function handle_edge(edge::JSON3.Object) end -function add_optional_params!(result::HIFEntryType, item::JSON3.Object) +function add_optional_params!(result::HIFEntryType, item::Dict{String, Any}) if haskey(item, "weight") result["weight"] = item["weight"] end @@ -130,19 +126,3 @@ function add_optional_params!(result::HIFEntryType, item::JSON3.Object) result["attrs"] = item["attrs"] end end - - -function cast_value(val::Union{String, Int}, t::Type{String}) - return string(val) -end - - -function cast_value(val::Int, t::Type{Int}) - return val -end - - -function cast_value(val::JSON3.Object, t::Type{JSON3.Object}) - return val -end - diff --git a/test/runtests.jl b/test/runtests.jl index 86b2e64..7ac9298 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -73,10 +73,12 @@ h1[5,2] = 6.5 @test get_vertex_meta(h1, 1) == get_vertex_meta(loaded_hg, 1) @test get_hyperedge_meta(h1, 2) == get_hyperedge_meta(loaded_hg, 2) - hg_save(path, h1, format=HIF_Format()) - loaded_hg = hg_load(path, HIF_Format(), T=Float64) + hg_save("test.json", h1, format=HIF_Format()) + loaded_hg = hg_load("test.json", HIF_Format(), T=Float64, V=Int, E=String) @test h1 == loaded_hg + @test h1.v_meta == loaded_hg.v_meta + @test h1.he_meta == loaded_hg.he_meta end @test_throws ArgumentError hg_load("data/test_malformedcomment.hgf"; T=Int) @@ -161,6 +163,16 @@ h1[5,2] = 6.5 end; +@testset "SimpleHypergraphs HIF Format " begin + data_dir = "data/HIF-standard/" + + files = [file for file in readdir(data_dir) if split(file, ".")[end] == "json"] + + + +end + + @testset "SimpleHypergraphs BipartiteView " begin h2 = deepcopy(h1) From ae33a1e6e33532dd91f447f2ddf7612f16142f1c Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sun, 6 Apr 2025 13:45:47 +0200 Subject: [PATCH 14/23] update --- src/hif/hif_load.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/hif/hif_load.jl b/src/hif/hif_load.jl index ba84644..fa6e031 100644 --- a/src/hif/hif_load.jl +++ b/src/hif/hif_load.jl @@ -49,8 +49,11 @@ function init_hypergraph( V = Nothing, E = Nothing ) where {U<:Real} - node_metadata = Vector{Union{V, Nothing}}([nothing for _ in 1:length(nodes)]) - edge_metadata = Vector{Union{E, Nothing}}([nothing for _ in 1:length(edges)]) + n = haskey(data, "nodes") ? max(length(nodes), length(data["nodes"])) : length(nodes) + k = haskey(data, "edges") ? max(length(edges), length(data["edges"])) : length(edges) + + node_metadata = Vector{Union{V, Nothing}}([nothing for _ in 1:n]) + edge_metadata = Vector{Union{E, Nothing}}([nothing for _ in 1:k]) if haskey(data, "nodes") tmp = [node_obj["node"] for node_obj in data["nodes"]] @@ -78,11 +81,9 @@ function init_hypergraph( edge_metadata[i] = edge push!(s_tmp, edge) end - else - append!(edge_metadata, [nothing for _ in 1:length(edges)]) end - return Hypergraph{T,V,E,D}(length(nodes), length(edges), node_metadata, edge_metadata) + return Hypergraph{T,V,E,D}(n, k, node_metadata, edge_metadata) end From f716a3f27e15341d8b0f9fbb2af66ae966d18709 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sun, 6 Apr 2025 13:55:57 +0200 Subject: [PATCH 15/23] update --- src/SimpleHypergraphs.jl | 4 +- src/{hif/hif_load.jl => hif.jl} | 131 ++++++++++++++++++++++++++++++++ src/hif/hif.jl | 1 - src/hif/hif_save.jl | 128 ------------------------------- 4 files changed, 132 insertions(+), 132 deletions(-) rename src/{hif/hif_load.jl => hif.jl} (59%) delete mode 100644 src/hif/hif.jl delete mode 100644 src/hif/hif_save.jl diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 853c657..33cfe71 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -78,9 +78,7 @@ include("abstracttypes.jl") include("hypergraph.jl") include("io.jl") -include("hif/hif.jl") -include("hif/hif_load.jl") -include("hif/hif_save.jl") +include("hif.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/hif/hif_load.jl b/src/hif.jl similarity index 59% rename from src/hif/hif_load.jl rename to src/hif.jl index fa6e031..16bb756 100644 --- a/src/hif/hif_load.jl +++ b/src/hif.jl @@ -1,5 +1,8 @@ using JSON3 + +struct HIF_Format <: Abstract_HG_format end + """ hg_load( io::IO, @@ -160,3 +163,131 @@ function hg_load( ) where {U<:Real} open(io -> hg_load(io, format; T = T, D = D, V = V, E = E), fname, "r") end + + +HIFEntryType = Dict{String, Union{String, Number, Dict{String, Any}}} + +""" + hg_save(io::IO, h::Hypergraph, format::HIF_Format) + +Saves a hypergraph `h` to an output stream `io` in `HIF` format. + +If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, +the user has to explicit tell the JSON3 package about it, for instance using: + +`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. + +See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. + +""" +function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} + _ = format + + json_hg = Dict{Symbol,Any}() + + nodes_meta = prepare_metadata(h.v_meta, handle_node) + edges_meta = prepare_metadata(h.he_meta, handle_edge) + + incidences = prepare_incidences(h) + + json_hg[:incidences] = incidences + + if !isempty(nodes_meta) + json_hg[:nodes] = nodes_meta + end + + if !isempty(edges_meta) + json_hg[:edges] = edges_meta + end + + JSON3.write(io, json_hg) +end + + +function prepare_incidences(h::Hypergraph{T, V, E, D}) where {T, V, E, D} + incidences = Vector{HIFEntryType}() + + for node_idx in eachindex(h.v_meta) + edges = gethyperedges(h, node_idx) + + node = isnothing(h.v_meta[node_idx]) ? node_idx : h.v_meta[node_idx] + + _node = (V == Dict{String, Any}) ? node["node"] : node + + for (edge, weight) in edges + _edge = isnothing(h.he_meta[edge]) ? edge : h.he_meta[edge] + if T == Bool + push!(incidences, Dict("edge" => _edge, "node" => _node)) + else + push!(incidences, Dict("edge" => _edge, "node" => _node, "weight" => weight)) + end + end + end + + return incidences +end + + +function prepare_metadata( + metadata::Vector{Union{T, Nothing}}, + handling_func::Function +) where {T} + result = Vector{HIFEntryType}() + + for item in metadata + if isnothing(item) + continue + end + + handled = handling_func(item) + push!(result, handled) + end + + return result +end + + +function handle_node(node::Union{String, Int}) + return Dict{String, Union{String, Int}}( + "node" => node + ) +end + +function handle_node(node::Dict{String, Any}) + result = HIFEntryType( + "node" => node["node"] + ) + + add_optional_params!(result, node) + + return result +end + + +function handle_edge(edge::Union{String, Int}) + return Dict{String, Union{String, Int}}( + "edge" => edge + ) +end + + +function handle_edge(edge::Dict{String, Any}) + result = HIFEntryType( + "edge" => edge["edge"] + ) + + add_optional_params!(result, edge) + + return result +end + + +function add_optional_params!(result::HIFEntryType, item::Dict{String, Any}) + if haskey(item, "weight") + result["weight"] = item["weight"] + end + + if haskey(item, "attrs") + result["attrs"] = item["attrs"] + end +end diff --git a/src/hif/hif.jl b/src/hif/hif.jl deleted file mode 100644 index 4c54f56..0000000 --- a/src/hif/hif.jl +++ /dev/null @@ -1 +0,0 @@ -struct HIF_Format <: Abstract_HG_format end diff --git a/src/hif/hif_save.jl b/src/hif/hif_save.jl deleted file mode 100644 index 2382dd0..0000000 --- a/src/hif/hif_save.jl +++ /dev/null @@ -1,128 +0,0 @@ -using JSON3 - -HIFEntryType = Dict{String, Union{String, Number, Dict{String, Any}}} - -""" - hg_save(io::IO, h::Hypergraph, format::HIF_Format) - -Saves a hypergraph `h` to an output stream `io` in `HIF` format. - -If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, -the user has to explicit tell the JSON3 package about it, for instance using: - -`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. - -See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. - -""" -function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} - _ = format - - json_hg = Dict{Symbol,Any}() - - nodes_meta = prepare_metadata(h.v_meta, handle_node) - edges_meta = prepare_metadata(h.he_meta, handle_edge) - - incidences = prepare_incidences(h) - - json_hg[:incidences] = incidences - - if !isempty(nodes_meta) - json_hg[:nodes] = nodes_meta - end - - if !isempty(edges_meta) - json_hg[:edges] = edges_meta - end - - JSON3.write(io, json_hg) -end - - -function prepare_incidences(h::Hypergraph{T, V, E, D}) where {T, V, E, D} - incidences = Vector{HIFEntryType}() - - for node_idx in eachindex(h.v_meta) - edges = gethyperedges(h, node_idx) - - node = isnothing(h.v_meta[node_idx]) ? node_idx : h.v_meta[node_idx] - - _node = (V == Dict{String, Any}) ? node["node"] : node - - for (edge, weight) in edges - _edge = isnothing(h.he_meta[edge]) ? edge : h.he_meta[edge] - if T == Bool - push!(incidences, Dict("edge" => _edge, "node" => _node)) - else - push!(incidences, Dict("edge" => _edge, "node" => _node, "weight" => weight)) - end - end - end - - return incidences -end - - -function prepare_metadata( - metadata::Vector{Union{T, Nothing}}, - handling_func::Function -) where {T} - result = Vector{HIFEntryType}() - - for item in metadata - if isnothing(item) - continue - end - - handled = handling_func(item) - push!(result, handled) - end - - return result -end - - -function handle_node(node::Union{String, Int}) - return Dict{String, Union{String, Int}}( - "node" => node - ) -end - -function handle_node(node::Dict{String, Any}) - result = HIFEntryType( - "node" => node["node"] - ) - - add_optional_params!(result, node) - - return result -end - - -function handle_edge(edge::Union{String, Int}) - return Dict{String, Union{String, Int}}( - "edge" => edge - ) -end - - -function handle_edge(edge::Dict{String, Any}) - result = HIFEntryType( - "edge" => edge["edge"] - ) - - add_optional_params!(result, edge) - - return result -end - - -function add_optional_params!(result::HIFEntryType, item::Dict{String, Any}) - if haskey(item, "weight") - result["weight"] = item["weight"] - end - - if haskey(item, "attrs") - result["attrs"] = item["attrs"] - end -end From d7908ddb9e006b4e66e95dc50467e2762369ab2d Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sun, 6 Apr 2025 13:59:59 +0200 Subject: [PATCH 16/23] update --- src/SimpleHypergraphs.jl | 2 +- src/{hif.jl => io_hif.jl} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{hif.jl => io_hif.jl} (100%) diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index 33cfe71..1cfb3d4 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -78,7 +78,7 @@ include("abstracttypes.jl") include("hypergraph.jl") include("io.jl") -include("hif.jl") +include("io_hif.jl") include("models/bipartite.jl") include("models/twosection.jl") diff --git a/src/hif.jl b/src/io_hif.jl similarity index 100% rename from src/hif.jl rename to src/io_hif.jl From 5b470ff45f56139a44b4df37fc6be3e46eb6a6ac Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Wed, 9 Apr 2025 18:37:51 +0200 Subject: [PATCH 17/23] start adding tests --- test/runtests.jl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index c3accef..f7a830f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -598,3 +598,26 @@ end; @test distance(h, SedgeDistanceDijkstra(2, 3, 3)) == 1 @test distance(h, SedgeDistanceDijkstra(1, 3, 3)) == typemax(Int) end; + + +@testset "SimpleHypergraphs HIF format " begin + + path = "data/HIF-standard/single_node.json" + + h = hg_load( + path, + HIF_Format(), + T=Bool, + V=Int, + E=Int, + ) + + path = "test.json" + hg_save(path, h, format=HIF_Format()) + loaded_hg = hg_load(path, HIF_Format(), T=Float64, V=Int, E=String) + + @test h == loaded_hg + @test loaded_hg.v_meta == h.v_meta + @test loaded_hg.he_meta == h.he_meta + +end From c13fe0e61784945235950562ba2a693802c57b20 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sun, 11 May 2025 15:15:01 +0200 Subject: [PATCH 18/23] start refactor --- src/io_hif.jl | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/src/io_hif.jl b/src/io_hif.jl index 16bb756..f4f3543 100644 --- a/src/io_hif.jl +++ b/src/io_hif.jl @@ -3,6 +3,115 @@ using JSON3 struct HIF_Format <: Abstract_HG_format end + +function hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V::Type{Z} = Int, + E::Type{Z} = Int, + sort_by_id::bool=false, + show_warning::bool=true, +) where {U<:Real, Z<:Union{Int, String}} + data = JSON3.read(read(io, String), Dict{String, Any}) + + edges = build_edges_dataframe(data, E) + nodes = build_nodes_dataframe(data, V) + + add_nodes_and_edges_from_incidences!(data, edges, nodes, V, E) + + if sort_by_id + sort!(edges, (:edge)) + sort!(nodes, (:node)) + end + + if show_warning + if edges.edge != 1:nrow(edges) + @warn "" # TODO: Add warning message + end + + if nodes.node != 1:nrow(nodes) + @warn "" # TODO: Add warning message + end + end + + hg = Hypergraph{T, V, E, D}(nrow(nodes), nrow(edges)) + + hg +end + + +function build_edges_dataframe( + data::Dict{String, Any}, + E::Type{Z} +) where {Z<:Union{Int, String}} + edges = DataFrame( + ; + edge=E[], + weight=Union{Missing, Float64}[], + attrs=Union{Missing, Dict{String, Any}}[] + ) + + for edge in data["edges"] + weight = (haskey(edge, "weight")) ? edge["weight"] : missing + attrs = (haskey(edge, "attrs")) ? edge["attrs"] : missing + + push!(edges, [edge["edge"], weight, attrs]) + end + + edges +end + +function build_nodes_dataframe( + data::Dict{String, Any}, + V::Type{Z} +) where {Z<:Union{Int, String}} + nodes = DataFrame( + ; + node=V[], + weight=Union{Missing, Float64}[], + attrs=Union{Missing, Dict{String, Any}}[] + ) + + for node in data["nodes"] + weight = (haskey(node, "weight")) ? node["weight"] : missing + attrs = (haskey(node, "attrs")) ? node["attrs"] : missing + + push!(nodes, [node["node"], weight, attrs]) + end + + nodes +end + + +function add_nodes_and_edges_from_incidences!( + data::Dict{String, Any}, + edges::DataFrame, + nodes::DataFrame, + V::Type{Z}, + E::Type{Z} +) where {Z<:Union{Int, String}} + edge_ids = Set{E}(edges.edge) + node_ids = Set{V}(nodes.node) + + for incidence in data["incidences"] + node = incidence["node"] + edge = incidence["edge"] + + if node ∉ node_ids + push!(nodes, [node, missing, missing]) + push!(node_ids, node) + end + + if edge ∉ edge_ids + push!(edges, [edge, missing, missing]) + push!(edge_ids, edge) + end + + end +end + """ hg_load( io::IO, From d3bb22788cb32bb694f856a0777137c23a1f0429 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Mon, 19 May 2025 21:52:03 +0200 Subject: [PATCH 19/23] update --- Project.toml | 2 + src/io_hif.jl | 212 +++++++++++++++----------------------------------- 2 files changed, 64 insertions(+), 150 deletions(-) diff --git a/Project.toml b/Project.toml index db60d1b..102c676 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.3.0" [deps] Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" @@ -18,6 +19,7 @@ StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" [compat] Conda = "^1.5.0" +DataFrames = "1.7.0" DataStructures = "^0.18.11" Graphs = "^1.4.1" JSON3 = "^1.0.1" diff --git a/src/io_hif.jl b/src/io_hif.jl index f4f3543..6f8a065 100644 --- a/src/io_hif.jl +++ b/src/io_hif.jl @@ -1,4 +1,5 @@ using JSON3 +using DataFrames struct HIF_Format <: Abstract_HG_format end @@ -11,8 +12,8 @@ function hg_load( D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, V::Type{Z} = Int, E::Type{Z} = Int, - sort_by_id::bool=false, - show_warning::bool=true, + sort_by_id::Bool=false, + show_warning::Bool=true, ) where {U<:Real, Z<:Union{Int, String}} data = JSON3.read(read(io, String), Dict{String, Any}) @@ -36,12 +37,62 @@ function hg_load( end end - hg = Hypergraph{T, V, E, D}(nrow(nodes), nrow(edges)) + v_meta = Vector{Union{V, Dict{String, Any}}}() + he_meta = Vector{Union{V, Dict{String, Any}}}() + + for row in eachrow(nodes) + attrs = row.attrs + if isnothing(attrs) + attrs = row.node + end + push!(v_meta, attrs) + end + + for row in eachrow(edges) + attrs = row.attrs + if isnothing(attrs) + attrs = row.edge + end + + push!(he_meta, attrs) + end + + hg = Hypergraph{ + T, + Union{V, Dict{String, Any}}, + Union{E, Dict{String, Any}}, + D, + }(nrow(nodes), nrow(edges), v_meta, he_meta) + + add_weights_from_incidences!(data, hg, edges, nodes, V, E) hg end +function add_weights_from_incidences!( + data::Dict{String, Any}, + hg::Hypergraph, + edges::DataFrame, + nodes::DataFrame, + V::Type{Z}, + E::Type{Z} +) where {Z<:Union{Int, String}} + edge_dict = Dict{E, Int}(row.edge => idx for (row, idx) in zip(eachrow(edges), 1:nrow(edges))) + node_dict = Dict{V, Int}(row.node => idx for (row, idx) in zip(eachrow(nodes), 1:nrow(nodes))) + + incidences = data["incidences"] + + for inc in incidences + edge_idx = edge_dict[inc["edge"]] + node_idx = node_dict[inc["node"]] + + weight = (haskey(inc, "weight")) ? inc["weight"] : 1 + + hg[node_idx, edge_idx] = weight + end +end + function build_edges_dataframe( data::Dict{String, Any}, E::Type{Z} @@ -49,15 +100,13 @@ function build_edges_dataframe( edges = DataFrame( ; edge=E[], - weight=Union{Missing, Float64}[], - attrs=Union{Missing, Dict{String, Any}}[] + attrs=Union{Nothing, Dict{String, Any}}[] ) for edge in data["edges"] - weight = (haskey(edge, "weight")) ? edge["weight"] : missing - attrs = (haskey(edge, "attrs")) ? edge["attrs"] : missing + attrs = (haskey(edge, "attrs")) ? edge["attrs"] : nothing - push!(edges, [edge["edge"], weight, attrs]) + push!(edges, [edge["edge"], attrs]) end edges @@ -70,15 +119,13 @@ function build_nodes_dataframe( nodes = DataFrame( ; node=V[], - weight=Union{Missing, Float64}[], - attrs=Union{Missing, Dict{String, Any}}[] + attrs=Union{Nothing, Dict{String, Any}}[] ) for node in data["nodes"] - weight = (haskey(node, "weight")) ? node["weight"] : missing - attrs = (haskey(node, "attrs")) ? node["attrs"] : missing + attrs = (haskey(node, "attrs")) ? node["attrs"] : nothing - push!(nodes, [node["node"], weight, attrs]) + push!(nodes, [node["node"], attrs]) end nodes @@ -100,153 +147,18 @@ function add_nodes_and_edges_from_incidences!( edge = incidence["edge"] if node ∉ node_ids - push!(nodes, [node, missing, missing]) + push!(nodes, [node, nothing]) push!(node_ids, node) end if edge ∉ edge_ids - push!(edges, [edge, missing, missing]) + push!(edges, [edge, nothing]) push!(edge_ids, edge) end end end -""" - hg_load( - io::IO, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - ) where {U <: Real} - -Loads a hypergraph from a stream `io` from `HIF` format. -More info: https://github.com/pszufe/HIF-standard - -**Arguments** - -* `T` : type of weight values stored in the hypergraph's adjacency matrix -* `D` : dictionary for storing values the default is `Dict{Int, T}` -""" -function hg_load( - io::IO, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V = Nothing, - E = Nothing -) where {U<:Real} - data = JSON3.read(read(io, String), Dict{String, Any}) - - if !haskey(data, "incidences") - throw(ArgumentError("Invalid JSON schema: missing required key 'incidences'")) - end - - nodes, edges = get_nodes_and_edges(data, V, E) - - h = init_hypergraph(data, nodes, edges, T, D, V, E) - - add_weights_from_incidences!(h, data["incidences"], nodes, edges) - - h -end - - -function init_hypergraph( - data::Dict{String, Any}, - nodes::AbstractVector{Union{String, Int}}, - edges::AbstractVector{Union{String, Int}}, - T::Type{U}, - D::Type{<:AbstractDict{Int,U}}, - V = Nothing, - E = Nothing -) where {U<:Real} - n = haskey(data, "nodes") ? max(length(nodes), length(data["nodes"])) : length(nodes) - k = haskey(data, "edges") ? max(length(edges), length(data["edges"])) : length(edges) - - node_metadata = Vector{Union{V, Nothing}}([nothing for _ in 1:n]) - edge_metadata = Vector{Union{E, Nothing}}([nothing for _ in 1:k]) - - if haskey(data, "nodes") - tmp = [node_obj["node"] for node_obj in data["nodes"]] - s_tmp = Set{V}() - - for (i, node) in pairs(tmp) - if node in s_tmp - continue - end - - node_metadata[i] = node - push!(s_tmp, node) - end - end - - if haskey(data, "edges") - tmp = [edge_obj["edge"] for edge_obj in data["edges"]] - s_tmp = Set{E}() - - for (i, edge) in pairs(tmp) - if edge in s_tmp - continue - end - - edge_metadata[i] = edge - push!(s_tmp, edge) - end - end - - return Hypergraph{T,V,E,D}(n, k, node_metadata, edge_metadata) -end - - -function get_nodes_and_edges(data::Dict{String, Any}, V, E) - node_set = Set{Union{String, Int}}() - edge_set = Set{Union{String, Int}}() - - nodes = Vector{Union{String, Int}}() - edges = Vector{Union{String, Int}}() - - for inc in data["incidences"] - node = (V == String) ? string(inc["node"]) : inc["node"] - edge = (E == String) ? string(inc["edge"]) : inc["edge"] - - if node ∉ node_set - push!(node_set, node) - push!(nodes, node) - end - - if edge ∉ edge_set - push!(edge_set, edge) - push!(edges, edge) - end - end - - sort!(nodes) - sort!(edges) - - return nodes, edges -end - - -function add_weights_from_incidences!( - h::Hypergraph{T,V,E,D}, - incidences::AbstractVector, - nodes::Vector{Union{String, Int}}, - edges::Vector{Union{String, Int}} - ) where {T, V, E, D} - node_dict = Dict(val => i for (i, val) in pairs(nodes)) - edge_dict = Dict(val => i for (i, val) in pairs(edges)) - - for inc in incidences - node = (V == String) ? string(inc["node"]) : inc["node"] - edge = (E == String) ? string(inc["edge"]) : inc["edge"] - node_idx = node_dict[node] - he_idx = edge_dict[edge] - - h[node_idx, he_idx] = haskey(inc, "weight") ? inc["weight"] : 1 - end -end - """ hg_load( From d1547d0ad4ba28aea61efcd6380ef25aff533284 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Sat, 31 May 2025 17:21:59 +0200 Subject: [PATCH 20/23] fix for no incidences and edges as string --- src/io_hif.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/io_hif.jl b/src/io_hif.jl index 6f8a065..77b9381 100644 --- a/src/io_hif.jl +++ b/src/io_hif.jl @@ -10,8 +10,8 @@ function hg_load( format::HIF_Format; T::Type{U} = Bool, D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Type{Z} = Int, - E::Type{Z} = Int, + V::Type{Z} = Union{String, Int}, + E::Type{Z} = Union{String, Int}, sort_by_id::Bool=false, show_warning::Bool=true, ) where {U<:Real, Z<:Union{Int, String}} @@ -103,6 +103,10 @@ function build_edges_dataframe( attrs=Union{Nothing, Dict{String, Any}}[] ) + if !haskey(data, "edges") + return edges + end + for edge in data["edges"] attrs = (haskey(edge, "attrs")) ? edge["attrs"] : nothing @@ -122,6 +126,10 @@ function build_nodes_dataframe( attrs=Union{Nothing, Dict{String, Any}}[] ) + if !haskey(data, "nodes") + return nodes + end + for node in data["nodes"] attrs = (haskey(node, "attrs")) ? node["attrs"] : nothing From 60dadb0276e69a853da702fb3b68f7f562ad1f9b Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Mon, 2 Jun 2025 21:38:57 +0200 Subject: [PATCH 21/23] add saving and tests --- src/io_hif.jl | 153 ++++++------------ test/data/HIF-standard/single_edge.json | 8 - .../HIF-standard/single_edge_with_attrs.json | 12 -- test/data/HIF-standard/single_node.json | 8 - .../HIF-standard/single_node_with_attrs.json | 13 -- test/runtests.jl | 35 +++- 6 files changed, 80 insertions(+), 149 deletions(-) delete mode 100644 test/data/HIF-standard/single_edge.json delete mode 100644 test/data/HIF-standard/single_edge_with_attrs.json delete mode 100644 test/data/HIF-standard/single_node.json delete mode 100644 test/data/HIF-standard/single_node_with_attrs.json diff --git a/src/io_hif.jl b/src/io_hif.jl index 77b9381..fbb4401 100644 --- a/src/io_hif.jl +++ b/src/io_hif.jl @@ -17,6 +17,21 @@ function hg_load( ) where {U<:Real, Z<:Union{Int, String}} data = JSON3.read(read(io, String), Dict{String, Any}) + haskey(data, "incidences") || throw(ArgumentError("Missing required attribute 'incidences'")) + + if isempty(data["incidences"]) + if isempty(get(data, "edges", [])) && isempty(get(data, "nodes", [])) + return Hypergraph{ + T, + Union{V, Dict{String, Any}}, + Union{E, Dict{String, Any}}, + D, + }(0, 0) + elseif isempty(data["edges"]) || isempty(data["nodes"]) + throw(ArgumentError("When incidences are empty, both 'nodes' and 'edges' must contain data")) + end + end + edges = build_edges_dataframe(data, E) nodes = build_nodes_dataframe(data, V) @@ -70,6 +85,21 @@ function hg_load( end +function hg_load( + fname::String, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + V::Type{Z} = Union{String, Int}, + E::Type{Z} = Union{String, Int}, + sort_by_id::Bool=false, + show_warning::Bool=true, +) where {U<:Real, Z<:Union{Int, String}} + open(io -> hg_load(io, format, T=T, D=D, V=V, E=E, sort_by_id=sort_by_id, show_warning=show_warning), fname, "r") +end + + + function add_weights_from_incidences!( data::Dict{String, Any}, hg::Hypergraph, @@ -107,10 +137,16 @@ function build_edges_dataframe( return edges end + seen = Set{Union{Int, String}}() + for edge in data["edges"] + if edge["edge"] ∈ seen + continue + end attrs = (haskey(edge, "attrs")) ? edge["attrs"] : nothing push!(edges, [edge["edge"], attrs]) + push!(seen, edge["edge"]) end edges @@ -130,10 +166,17 @@ function build_nodes_dataframe( return nodes end + seen = Set{Union{String, Int}}() + for node in data["nodes"] + if node["node"] ∈ seen + continue + end + attrs = (haskey(node, "attrs")) ? node["attrs"] : nothing push!(nodes, [node["node"], attrs]) + push!(seen, node["node"]) end nodes @@ -149,7 +192,6 @@ function add_nodes_and_edges_from_incidences!( ) where {Z<:Union{Int, String}} edge_ids = Set{E}(edges.edge) node_ids = Set{V}(nodes.node) - for incidence in data["incidences"] node = incidence["node"] edge = incidence["edge"] @@ -194,8 +236,6 @@ function hg_load( end -HIFEntryType = Dict{String, Union{String, Number, Dict{String, Any}}} - """ hg_save(io::IO, h::Hypergraph, format::HIF_Format) @@ -212,111 +252,22 @@ See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more de function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} _ = format - json_hg = Dict{Symbol,Any}() - - nodes_meta = prepare_metadata(h.v_meta, handle_node) - edges_meta = prepare_metadata(h.he_meta, handle_edge) - - incidences = prepare_incidences(h) - - json_hg[:incidences] = incidences - - if !isempty(nodes_meta) - json_hg[:nodes] = nodes_meta - end - - if !isempty(edges_meta) - json_hg[:edges] = edges_meta - end - - JSON3.write(io, json_hg) -end - + incidences = Vector{Dict{String, Union{String, Int}}}() -function prepare_incidences(h::Hypergraph{T, V, E, D}) where {T, V, E, D} - incidences = Vector{HIFEntryType}() + for i in 1:length(h.v_meta) + for j in 1:length(h.he_meta) - for node_idx in eachindex(h.v_meta) - edges = gethyperedges(h, node_idx) - - node = isnothing(h.v_meta[node_idx]) ? node_idx : h.v_meta[node_idx] - - _node = (V == Dict{String, Any}) ? node["node"] : node - - for (edge, weight) in edges - _edge = isnothing(h.he_meta[edge]) ? edge : h.he_meta[edge] - if T == Bool - push!(incidences, Dict("edge" => _edge, "node" => _node)) - else - push!(incidences, Dict("edge" => _edge, "node" => _node, "weight" => weight)) + if isnothing(h[i, j]) + continue end - end - end - - return incidences -end + weight = h[i, j] -function prepare_metadata( - metadata::Vector{Union{T, Nothing}}, - handling_func::Function -) where {T} - result = Vector{HIFEntryType}() - - for item in metadata - if isnothing(item) - continue + push!(incidences, Dict{String, Union{String, Int}}("edge" => i, "node" => j, "weight" => Int(weight))) end - - handled = handling_func(item) - push!(result, handled) end - return result -end - - -function handle_node(node::Union{String, Int}) - return Dict{String, Union{String, Int}}( - "node" => node - ) -end - -function handle_node(node::Dict{String, Any}) - result = HIFEntryType( - "node" => node["node"] - ) - - add_optional_params!(result, node) + json_hg = Dict{Symbol, Any}(:incidences => incidences) - return result -end - - -function handle_edge(edge::Union{String, Int}) - return Dict{String, Union{String, Int}}( - "edge" => edge - ) -end - - -function handle_edge(edge::Dict{String, Any}) - result = HIFEntryType( - "edge" => edge["edge"] - ) - - add_optional_params!(result, edge) - - return result -end - - -function add_optional_params!(result::HIFEntryType, item::Dict{String, Any}) - if haskey(item, "weight") - result["weight"] = item["weight"] - end - - if haskey(item, "attrs") - result["attrs"] = item["attrs"] - end + JSON3.write(io, json_hg) end diff --git a/test/data/HIF-standard/single_edge.json b/test/data/HIF-standard/single_edge.json deleted file mode 100644 index 6688947..0000000 --- a/test/data/HIF-standard/single_edge.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "incidences": [], - "edges": [ - { - "edge": 3 - } - ] -} \ No newline at end of file diff --git a/test/data/HIF-standard/single_edge_with_attrs.json b/test/data/HIF-standard/single_edge_with_attrs.json deleted file mode 100644 index 4e5871e..0000000 --- a/test/data/HIF-standard/single_edge_with_attrs.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "incidences": [], - "edges": [ - { - "edge": 3, - "attrs": { - "timestamp": "2020-04-01", - "weight": 2.0 - } - } - ] -} \ No newline at end of file diff --git a/test/data/HIF-standard/single_node.json b/test/data/HIF-standard/single_node.json deleted file mode 100644 index 54987b2..0000000 --- a/test/data/HIF-standard/single_node.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "incidences": [], - "nodes": [ - { - "node": 42 - } - ] -} \ No newline at end of file diff --git a/test/data/HIF-standard/single_node_with_attrs.json b/test/data/HIF-standard/single_node_with_attrs.json deleted file mode 100644 index efb03ef..0000000 --- a/test/data/HIF-standard/single_node_with_attrs.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "incidences": [], - "nodes": [ - { - "node": 42, - "attrs": { - "weight": 2, - "color": "blue", - "online": true - } - } - ] -} \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index f7a830f..c77107b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,13 +8,34 @@ using DataStructures import Graphs -h1 = Hypergraph{Float64, Int, String}(5,4) -h1[1:3,1] .= 1.5 -h1[3,4] = 2.5 -h1[2,3] = 3.5 -h1[4,3:4] .= 4.5 -h1[5,4] = 5.5 -h1[5,2] = 6.5 +@testset "HIF test" begin + dir = "data/HIF-standard" + + for file in readdir(dir) + full_path = joinpath(dir, file) + + endswith(file, ".json") || continue + + @testset "File: $file" begin + h = hg_load(full_path, HIF_Format(), T=Real) + + io_h = IOBuffer() + + hg_save(io_h, h, HIF_Format()) + + seekstart(io_h) + + h_loaded = hg_load(io_h, HIF_Format(), T=Real) + + @test h == h_loaded + end + end +end + + + @test h1 == loaded_hg + @test h1.v_meta == loaded_hg.v_meta + @test h1.he_meta == loaded_hg.he_meta @testset "SimpleHypergraphs Hypergraph " begin From 64f51b3c213b92d8b62319efb4eadd3e29898843 Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Tue, 3 Jun 2025 19:48:45 +0200 Subject: [PATCH 22/23] fix last corner case --- src/io_hif.jl | 15 +++++++++++++-- test/runtests.jl | 33 +++++++-------------------------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/src/io_hif.jl b/src/io_hif.jl index fbb4401..5ac70a5 100644 --- a/src/io_hif.jl +++ b/src/io_hif.jl @@ -44,11 +44,11 @@ function hg_load( if show_warning if edges.edge != 1:nrow(edges) - @warn "" # TODO: Add warning message + @warn "Edges in the source file were not sorted - their order was changed." end if nodes.node != 1:nrow(nodes) - @warn "" # TODO: Add warning message + @warn "Nodes in the source file were not sorted - their order was changed" end end @@ -114,13 +114,24 @@ function add_weights_from_incidences!( incidences = data["incidences"] for inc in incidences + haskey(edge_dict, inc["edge"]) || continue # duplicates + haskey(node_dict, inc["node"]) || continue # duplicates edge_idx = edge_dict[inc["edge"]] node_idx = node_dict[inc["node"]] weight = (haskey(inc, "weight")) ? inc["weight"] : 1 hg[node_idx, edge_idx] = weight + + pop!(edge_dict, inc["edge"]) + pop!(node_dict, inc["node"]) + end + + # add remaining items + for (node, edge) in zip(values(node_dict), values(edge_dict)) + hg[node, edge] = 1 end + end function build_edges_dataframe( diff --git a/test/runtests.jl b/test/runtests.jl index c77107b..0983c50 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -33,9 +33,13 @@ import Graphs end - @test h1 == loaded_hg - @test h1.v_meta == loaded_hg.v_meta - @test h1.he_meta == loaded_hg.he_meta +h1 = Hypergraph{Float64, Int, String}(5,4) +h1[1:3,1] .= 1.5 +h1[3,4] = 2.5 +h1[2,3] = 3.5 +h1[4,3:4] .= 4.5 +h1[5,4] = 5.5 +h1[5,2] = 6.5 @testset "SimpleHypergraphs Hypergraph " begin @@ -619,26 +623,3 @@ end; @test distance(h, SedgeDistanceDijkstra(2, 3, 3)) == 1 @test distance(h, SedgeDistanceDijkstra(1, 3, 3)) == typemax(Int) end; - - -@testset "SimpleHypergraphs HIF format " begin - - path = "data/HIF-standard/single_node.json" - - h = hg_load( - path, - HIF_Format(), - T=Bool, - V=Int, - E=Int, - ) - - path = "test.json" - hg_save(path, h, format=HIF_Format()) - loaded_hg = hg_load(path, HIF_Format(), T=Float64, V=Int, E=String) - - @test h == loaded_hg - @test loaded_hg.v_meta == h.v_meta - @test loaded_hg.he_meta == h.he_meta - -end From 64eb4bdbb27104d7d06e835fbb8e32f2c44e8efd Mon Sep 17 00:00:00 2001 From: Aleksander Wojnarowicz Date: Thu, 5 Jun 2025 09:42:03 +0200 Subject: [PATCH 23/23] update --- src/io_hif.jl | 112 +++++++++++++------------------------------------- 1 file changed, 28 insertions(+), 84 deletions(-) diff --git a/src/io_hif.jl b/src/io_hif.jl index 5ac70a5..aa857be 100644 --- a/src/io_hif.jl +++ b/src/io_hif.jl @@ -10,11 +10,9 @@ function hg_load( format::HIF_Format; T::Type{U} = Bool, D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Type{Z} = Union{String, Int}, - E::Type{Z} = Union{String, Int}, sort_by_id::Bool=false, show_warning::Bool=true, -) where {U<:Real, Z<:Union{Int, String}} +) where {U<:Real} data = JSON3.read(read(io, String), Dict{String, Any}) haskey(data, "incidences") || throw(ArgumentError("Missing required attribute 'incidences'")) @@ -23,8 +21,8 @@ function hg_load( if isempty(get(data, "edges", [])) && isempty(get(data, "nodes", [])) return Hypergraph{ T, - Union{V, Dict{String, Any}}, - Union{E, Dict{String, Any}}, + Union{Union{String, Int}, Dict{String, Any}}, + Union{Union{String, Int}, Dict{String, Any}}, D, }(0, 0) elseif isempty(data["edges"]) || isempty(data["nodes"]) @@ -32,10 +30,10 @@ function hg_load( end end - edges = build_edges_dataframe(data, E) - nodes = build_nodes_dataframe(data, V) + edges = build_edges_dataframe(data) + nodes = build_nodes_dataframe(data) - add_nodes_and_edges_from_incidences!(data, edges, nodes, V, E) + add_nodes_and_edges_from_incidences!(data, edges, nodes) if sort_by_id sort!(edges, (:edge)) @@ -52,8 +50,8 @@ function hg_load( end end - v_meta = Vector{Union{V, Dict{String, Any}}}() - he_meta = Vector{Union{V, Dict{String, Any}}}() + v_meta = Vector{Union{Union{String, Int}, Dict{String, Any}}}() + he_meta = Vector{Union{Union{String, Int}, Dict{String, Any}}}() for row in eachrow(nodes) attrs = row.attrs @@ -74,12 +72,12 @@ function hg_load( hg = Hypergraph{ T, - Union{V, Dict{String, Any}}, - Union{E, Dict{String, Any}}, + Union{Union{String, Int}, Dict{String, Any}}, + Union{Union{String, Int}, Dict{String, Any}}, D, }(nrow(nodes), nrow(edges), v_meta, he_meta) - add_weights_from_incidences!(data, hg, edges, nodes, V, E) + add_weights_from_incidences!(data, hg, edges, nodes) hg end @@ -90,12 +88,10 @@ function hg_load( format::HIF_Format; T::Type{U} = Bool, D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V::Type{Z} = Union{String, Int}, - E::Type{Z} = Union{String, Int}, sort_by_id::Bool=false, show_warning::Bool=true, -) where {U<:Real, Z<:Union{Int, String}} - open(io -> hg_load(io, format, T=T, D=D, V=V, E=E, sort_by_id=sort_by_id, show_warning=show_warning), fname, "r") +) where {U<:Real} + open(io -> hg_load(io, format, T=T, D=D, sort_by_id=sort_by_id, show_warning=show_warning), fname, "r") end @@ -105,17 +101,13 @@ function add_weights_from_incidences!( hg::Hypergraph, edges::DataFrame, nodes::DataFrame, - V::Type{Z}, - E::Type{Z} -) where {Z<:Union{Int, String}} - edge_dict = Dict{E, Int}(row.edge => idx for (row, idx) in zip(eachrow(edges), 1:nrow(edges))) - node_dict = Dict{V, Int}(row.node => idx for (row, idx) in zip(eachrow(nodes), 1:nrow(nodes))) +) + edge_dict = Dict{Union{String, Int}, Int}(row.edge => idx for (row, idx) in zip(eachrow(edges), 1:nrow(edges))) + node_dict = Dict{Union{String, Int}, Int}(row.node => idx for (row, idx) in zip(eachrow(nodes), 1:nrow(nodes))) incidences = data["incidences"] for inc in incidences - haskey(edge_dict, inc["edge"]) || continue # duplicates - haskey(node_dict, inc["node"]) || continue # duplicates edge_idx = edge_dict[inc["edge"]] node_idx = node_dict[inc["node"]] @@ -123,24 +115,16 @@ function add_weights_from_incidences!( hg[node_idx, edge_idx] = weight - pop!(edge_dict, inc["edge"]) - pop!(node_dict, inc["node"]) - end - - # add remaining items - for (node, edge) in zip(values(node_dict), values(edge_dict)) - hg[node, edge] = 1 end end function build_edges_dataframe( data::Dict{String, Any}, - E::Type{Z} -) where {Z<:Union{Int, String}} +) edges = DataFrame( ; - edge=E[], + edge=Union{String, Int}[], attrs=Union{Nothing, Dict{String, Any}}[] ) @@ -165,11 +149,10 @@ end function build_nodes_dataframe( data::Dict{String, Any}, - V::Type{Z} -) where {Z<:Union{Int, String}} +) nodes = DataFrame( ; - node=V[], + node=Union{String, Int}[], attrs=Union{Nothing, Dict{String, Any}}[] ) @@ -198,11 +181,9 @@ function add_nodes_and_edges_from_incidences!( data::Dict{String, Any}, edges::DataFrame, nodes::DataFrame, - V::Type{Z}, - E::Type{Z} -) where {Z<:Union{Int, String}} - edge_ids = Set{E}(edges.edge) - node_ids = Set{V}(nodes.node) +) + edge_ids = Set{Union{String, Int}}(edges.edge) + node_ids = Set{Union{String, Int}}(nodes.node) for incidence in data["incidences"] node = incidence["node"] edge = incidence["edge"] @@ -221,32 +202,6 @@ function add_nodes_and_edges_from_incidences!( end -""" - hg_load( - fname::AbstractString; - format::Abstract_HG_format = HIF_Format(), - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int,U}, - ) where {U <: Real} - ) -Loads a hypergraph from a file `fname`. -The default saving format is `json`. -**Arguments** -* `T` : type of weight values stored in the hypergraph's adjacency matrix -* `D` : dictionary for storing values the default is `Dict{Int, T}` -""" -function hg_load( - fname::AbstractString, - format::HIF_Format; - T::Type{U} = Bool, - D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, - V = Nothing, - E = Nothing -) where {U<:Real} - open(io -> hg_load(io, format; T = T, D = D, V = V, E = E), fname, "r") -end - - """ hg_save(io::IO, h::Hypergraph, format::HIF_Format) @@ -261,24 +216,13 @@ See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more de """ function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} - _ = format - - incidences = Vector{Dict{String, Union{String, Int}}}() - - for i in 1:length(h.v_meta) - for j in 1:length(h.he_meta) - - if isnothing(h[i, j]) - continue - end - + incidences = Vector{Dict{String, Union{String, Int, T}}}() + for i in 1:nhv(h) + for j in sort!(collect(keys(gethyperedges(h, i)))) weight = h[i, j] - - push!(incidences, Dict{String, Union{String, Int}}("edge" => i, "node" => j, "weight" => Int(weight))) + push!(incidences, Dict{String, Union{String, Int, T}}("edge" => i, "node" => j, "weight" => T(weight))) end end - - json_hg = Dict{Symbol, Any}(:incidences => incidences) - + json_hg = Dict{Symbol, typeof(incidences)}(:incidences => incidences) JSON3.write(io, json_hg) end