@@ -14,7 +14,8 @@ class HTML < Base
14
14
15
15
# Checks for required gem dependencies of HTML Importer
16
16
def initialize
17
- optional_gem 'mechanize'
17
+ require 'open-uri'
18
+ optional_gem 'nokogiri'
18
19
end
19
20
20
21
# Reads from a html file / website
@@ -29,7 +30,7 @@ def initialize
29
30
# @example Reading from a website url file
30
31
# instance = Daru::IO::Importers::HTML.read('http://www.moneycontrol.com/')
31
32
def read ( path )
32
- @file_data = Mechanize . new . get ( path )
33
+ @file_data = Nokogiri . parse ( open ( path ) . read )
33
34
self
34
35
end
35
36
@@ -72,25 +73,23 @@ def read(path)
72
73
# # 3 ITC 315.85 6.75 621.12
73
74
# # 4 HDFC 1598.85 50.95 553.91
74
75
def call ( match : nil , order : nil , index : nil , name : nil )
75
- @match = match
76
- @options = { name : name , order : order , index : index }
76
+ @match = match
77
+ @options = { name : name , index : index , order : order }
77
78
78
79
@file_data
79
- . search ( 'table' ) . map { | table | parse_table table }
80
- . keep_if { |table | search table }
80
+ . search ( 'table' )
81
+ . map { |table | parse_table ( table ) }
81
82
. compact
82
- . map { |table | decide_values table , @options }
83
- . map { |table | table_to_dataframe table }
83
+ . keep_if { |table | satisfy_dimension ( table ) && search ( table ) }
84
+ . map { |table | decide_values ( table , @options ) }
85
+ . map { |table | table_to_dataframe ( table ) }
84
86
end
85
87
86
88
private
87
89
88
90
# Allows user to override the scraped order / index / data
89
- def decide_values ( scraped_val = { } , user_val = { } )
90
- %I[ data index name order ] . each do |key |
91
- user_val [ key ] ||= scraped_val [ key ]
92
- end
93
- user_val
91
+ def decide_values ( scraped_val , user_val )
92
+ scraped_val . merge ( user_val ) { |_key , scraped , user | user || scraped }
94
93
end
95
94
96
95
# Splits headers (all th tags) into order and index. Wherein,
@@ -121,15 +120,23 @@ def scrape_tag(table, tag)
121
120
[ arr , size ]
122
121
end
123
122
123
+ def satisfy_dimension ( table )
124
+ return false if @options [ :order ] && table [ :data ] . first . size != @options [ :order ] . size
125
+ return false if @options [ :index ] && table [ :data ] . size != @options [ :index ] . size
126
+ true
127
+ end
128
+
124
129
def search ( table )
125
- @match . nil? ? true : ( table . to_s . include? @match )
130
+ @match . nil? ? true : table . to_s . include? ( @match )
126
131
end
127
132
128
133
def table_to_dataframe ( table )
129
- Daru ::DataFrame . rows table [ :data ] ,
134
+ Daru ::DataFrame . rows (
135
+ table [ :data ] ,
130
136
index : table [ :index ] ,
131
137
order : table [ :order ] ,
132
138
name : table [ :name ]
139
+ )
133
140
end
134
141
end
135
142
end
0 commit comments