Skip to content

Commit 239469d

Browse files
committed
Add workflow for csv
1 parent 1bedc2c commit 239469d

File tree

4 files changed

+35
-7
lines changed

4 files changed

+35
-7
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"HT013166356, HT018625006, TT000577460","CI 5310","CI 5603","CI 1100","CI 1125","CI 5603","CI 5604","EC 2430","IH 34381"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"HT013166356, HT018625006, TT000577460","CI 5310","CI 5603","CI 1100","CI 1125","CI 5603","CI 5604","EC 2430","IH 34381"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Die flux filtert mittels morph-cg-to-es.xml die Isil DE-605 aus
2+
// This flux uses morph-cg-to-es.xml to filter records with holdings
3+
// by Isil DE-605 from culturegraph aggregate marcxml. It then builds
4+
// a concordance Id<->RVK which can be indexed directly into elasticsearch.
5+
// Snippet from the output json:
6+
//
7+
//{"index":{"_index":"cgrvk","_type":"rvk"}}
8+
//{"rvk":["CI 1100","5,1"],"hbzId":"HT018839495, HT018625006"}
9+
//
10+
// Use curl to bulk load the file:
11+
//
12+
// curl -XPOST --header 'Content-Type: application/x-ndjson' -d @bulk.ndjson 'http://localhost:9200/_bulk'
13+
14+
default outfile = FLUX_DIR + "bulk.json";
15+
default infile = FLUX_DIR + "aggregate_auslieferung_20191212.small.marcxml.gz";
16+
default fixfile = FLUX_DIR + "fix-cg-to-es.fix";
17+
18+
19+
infile
20+
| open-file
21+
| decode-xml
22+
| handle-marcxml
23+
| fix(fixfile)
24+
| encode-csv
25+
//encode-json
26+
| write(outfile)
27+
;

Concordance-RVK-Verbundbibliothek/fix-cg-to-es.fix

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
add_field("id","")
2-
set_array("@id")
1+
set_array("id")
32
set_array("rvk[]")
43

54
do list(path: "084??", "var": "$i")
@@ -8,14 +7,14 @@ do list(path: "084??", "var": "$i")
87
end
98
end
109

10+
1111
do list(path: "035??", "var": "$i")
1212
if any_match("$i.a", "^\\(DE-605\\)(.*)")
13-
copy_field("$i.a","@id.$append")
13+
copy_field("$i.a","id.$append")
1414
end
1515
end
16-
replace_all("@id.*","^\\(DE-605\\)(.*)","$1")
17-
join_field("@id",", ")
18-
move_field("@id","id")
16+
replace_all("id.*","^\\(DE-605\\)(.*)","$1")
17+
join_field("id",", ")
1918

2019
retain("rvk[]","id")
2120
vacuum()
@@ -25,7 +24,7 @@ unless exists("rvk[]")
2524
reject()
2625
end
2726

28-
# Filter records without hbz @ids
27+
# Filter records without hbz ids
2928
unless exists("id")
3029
reject()
3130
end

0 commit comments

Comments
 (0)