From 5f51f49b854747420e076f4597066d3804970592 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Tue, 13 May 2025 12:23:17 +0200 Subject: [PATCH 1/5] refactor: move images to a single directory --- .../images/browser-devtools-console-commands.png | Bin .../images/browser-devtools-console.png | Bin .../images/browser-devtools-element-selection.png | Bin .../images/browser-devtools-elements-tab.png | Bin .../images/browser-devtools-hover.png | Bin .../images/browser-devtools-wikipedia.png | Bin .../{challenge => }/images/crawlee-create.png | Bin .../images/csv-data-in-sheets.png | Bin .../images/devtools-clean-price.png | Bin .../images/devtools-cleaning-noise.png | Bin .../images/devtools-collection-class.png | Bin .../images/devtools-collection-product-hover.png | Bin .../images/devtools-collection-product-name.png | Bin .../images/devtools-collection-query-all.png | Bin .../images/devtools-collection-query-hover.png | Bin .../images/devtools-collection-query.png | Bin .../images/devtools-collection-warehouse.png | Bin .../images/devtools-count-products.png | Bin .../images/devtools-extract-product-price.png | Bin .../images/devtools-extract-product-title.png | Bin .../images/devtools-find-child-elements.png | Bin .../images/devtools-print-all-products.png | Bin .../images/devtools-print-parent-text.png | Bin .../images/devtools-product-titles.png | Bin .../images/devtools-split-price.png | Bin .../images/filtering-product-detail-link.png | Bin .../images/filtering-product-urls.png | Bin .../{crawling => }/images/filtering-regex-urls.png | Bin .../{crawling => }/images/headless-dynamic-data.png | Bin .../images/node-scraper-title.png | Bin .../{challenge => }/images/offers-page.jpg | Bin .../{crawling => }/images/scraping-title.png | Bin .../{challenge => }/images/view-offers-button.jpg | Bin .../images/vscode-create-file.png | Bin .../images/vscode-hello-world.png | Bin .../images/vscode-npm-init.png | Bin .../images/vscode-open-folder.png | Bin .../images/vscode-open-terminal.png | Bin .../images/vscode-test-setup.png | Bin .../images/vscode-type-module.png | Bin .../{crawling => }/images/warehouse-links.png | Bin .../{crawling => }/images/warehouse-store.png | Bin 42 files changed, 0 insertions(+), 0 deletions(-) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/browser-devtools-console-commands.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/browser-devtools-console.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/browser-devtools-element-selection.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/browser-devtools-elements-tab.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/browser-devtools-hover.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/browser-devtools-wikipedia.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge => }/images/crawlee-create.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/csv-data-in-sheets.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-clean-price.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-cleaning-noise.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-class.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-product-hover.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-product-name.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-query-all.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-query-hover.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-query.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-collection-warehouse.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-count-products.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-extract-product-price.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-extract-product-title.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-find-child-elements.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-print-all-products.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-print-parent-text.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-product-titles.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/devtools-split-price.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/filtering-product-detail-link.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/filtering-product-urls.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/filtering-regex-urls.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/headless-dynamic-data.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/node-scraper-title.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge => }/images/offers-page.jpg (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/scraping-title.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge => }/images/view-offers-button.jpg (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-create-file.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-hello-world.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-npm-init.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-open-folder.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-open-terminal.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-test-setup.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction => }/images/vscode-type-module.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/warehouse-links.png (100%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/images/warehouse-store.png (100%) diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console-commands.png b/sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-console-commands.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console-commands.png rename to sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-console-commands.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console.png b/sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-console.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console.png rename to sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-console.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-element-selection.png b/sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-element-selection.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-element-selection.png rename to sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-element-selection.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-elements-tab.png b/sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-elements-tab.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-elements-tab.png rename to sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-elements-tab.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-hover.png b/sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-hover.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-hover.png rename to sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-hover.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-wikipedia.png b/sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-wikipedia.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-wikipedia.png rename to sources/academy/webscraping/scraping_basics_javascript/images/browser-devtools-wikipedia.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/images/crawlee-create.png b/sources/academy/webscraping/scraping_basics_javascript/images/crawlee-create.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/images/crawlee-create.png rename to sources/academy/webscraping/scraping_basics_javascript/images/crawlee-create.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/csv-data-in-sheets.png b/sources/academy/webscraping/scraping_basics_javascript/images/csv-data-in-sheets.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/csv-data-in-sheets.png rename to sources/academy/webscraping/scraping_basics_javascript/images/csv-data-in-sheets.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-clean-price.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-clean-price.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-clean-price.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-clean-price.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-cleaning-noise.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-cleaning-noise.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-cleaning-noise.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-cleaning-noise.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-class.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-class.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-class.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-class.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-hover.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-product-hover.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-hover.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-product-hover.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-name.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-product-name.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-name.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-product-name.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-all.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-query-all.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-all.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-query-all.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-hover.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-query-hover.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-hover.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-query-hover.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-query.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-query.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-warehouse.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-warehouse.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-warehouse.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-collection-warehouse.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-count-products.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-count-products.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-count-products.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-count-products.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-price.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-extract-product-price.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-price.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-extract-product-price.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-title.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-extract-product-title.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-title.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-extract-product-title.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-find-child-elements.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-find-child-elements.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-find-child-elements.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-find-child-elements.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-all-products.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-print-all-products.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-all-products.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-print-all-products.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-parent-text.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-print-parent-text.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-parent-text.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-print-parent-text.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-product-titles.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-product-titles.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-product-titles.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-product-titles.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-split-price.png b/sources/academy/webscraping/scraping_basics_javascript/images/devtools-split-price.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-split-price.png rename to sources/academy/webscraping/scraping_basics_javascript/images/devtools-split-price.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-detail-link.png b/sources/academy/webscraping/scraping_basics_javascript/images/filtering-product-detail-link.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-detail-link.png rename to sources/academy/webscraping/scraping_basics_javascript/images/filtering-product-detail-link.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-urls.png b/sources/academy/webscraping/scraping_basics_javascript/images/filtering-product-urls.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-urls.png rename to sources/academy/webscraping/scraping_basics_javascript/images/filtering-product-urls.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-regex-urls.png b/sources/academy/webscraping/scraping_basics_javascript/images/filtering-regex-urls.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-regex-urls.png rename to sources/academy/webscraping/scraping_basics_javascript/images/filtering-regex-urls.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/headless-dynamic-data.png b/sources/academy/webscraping/scraping_basics_javascript/images/headless-dynamic-data.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/headless-dynamic-data.png rename to sources/academy/webscraping/scraping_basics_javascript/images/headless-dynamic-data.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/node-scraper-title.png b/sources/academy/webscraping/scraping_basics_javascript/images/node-scraper-title.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/node-scraper-title.png rename to sources/academy/webscraping/scraping_basics_javascript/images/node-scraper-title.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/images/offers-page.jpg b/sources/academy/webscraping/scraping_basics_javascript/images/offers-page.jpg similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/images/offers-page.jpg rename to sources/academy/webscraping/scraping_basics_javascript/images/offers-page.jpg diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/scraping-title.png b/sources/academy/webscraping/scraping_basics_javascript/images/scraping-title.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/scraping-title.png rename to sources/academy/webscraping/scraping_basics_javascript/images/scraping-title.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/images/view-offers-button.jpg b/sources/academy/webscraping/scraping_basics_javascript/images/view-offers-button.jpg similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/images/view-offers-button.jpg rename to sources/academy/webscraping/scraping_basics_javascript/images/view-offers-button.jpg diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-create-file.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-create-file.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-create-file.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-create-file.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-hello-world.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-hello-world.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-hello-world.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-hello-world.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-npm-init.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-npm-init.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-npm-init.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-npm-init.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-folder.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-open-folder.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-folder.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-open-folder.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-terminal.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-open-terminal.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-terminal.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-open-terminal.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-test-setup.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-test-setup.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-test-setup.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-test-setup.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-type-module.png b/sources/academy/webscraping/scraping_basics_javascript/images/vscode-type-module.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-type-module.png rename to sources/academy/webscraping/scraping_basics_javascript/images/vscode-type-module.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-links.png b/sources/academy/webscraping/scraping_basics_javascript/images/warehouse-links.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-links.png rename to sources/academy/webscraping/scraping_basics_javascript/images/warehouse-links.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-store.png b/sources/academy/webscraping/scraping_basics_javascript/images/warehouse-store.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-store.png rename to sources/academy/webscraping/scraping_basics_javascript/images/warehouse-store.png From 5bdf24023defd67ec52960f4951c0fc6a27c477a Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Tue, 13 May 2025 12:48:07 +0200 Subject: [PATCH 2/5] refactor: flat is better than nested, PEP20 said --- .../{introduction.md => 01_introduction.md} | 1 - .../{data_extraction/index.md => 02_data_extraction.md} | 1 - .../browser_devtools.md => 03_browser_devtools.md} | 1 - .../using_devtools.md => 04_using_devtools.md} | 1 - .../devtools_continued.md => 05_devtools_continued.md} | 1 - .../computer_preparation.md => 06_computer_preparation.md} | 1 - .../{data_extraction/project_setup.md => 07_project_setup.md} | 1 - .../node_js_scraper.md => 08_node_js_scraper.md} | 1 - .../node_continued.md => 09_node_continued.md} | 1 - .../{data_extraction/save_to_csv.md => 10_save_to_csv.md} | 1 - .../{crawling/index.md => 11_crawling.md} | 1 - ...ecap_extraction_basics.md => 12_recap_extraction_basics.md} | 3 +-- .../{crawling/finding_links.md => 13_finding_links.md} | 3 +-- .../{crawling/filtering_links.md => 14_filtering_links.md} | 3 +-- .../{crawling/relative_urls.md => 15_relative_urls.md} | 3 +-- .../{crawling/first_crawl.md => 16_first_crawl.md} | 3 +-- .../{crawling/scraping_the_data.md => 17_scraping_the_data.md} | 3 +-- .../{crawling/pro_scraping.md => 18_pro_scraping.md} | 3 +-- .../{crawling/headless_browser.md => 19_headless_browser.md} | 3 +-- .../{crawling/exporting_data.md => 20_exporting_data.md} | 3 +-- .../{challenge/index.md => 21_challenge.md} | 1 - ...ing_and_setting_up.md => 22_initializing_and_setting_up.md} | 3 +-- .../{challenge/modularity.md => 23_modularity.md} | 3 +-- .../{challenge/scraping_amazon.md => 24_scraping_amazon.md} | 3 +-- .../{best_practices.md => 25_best_practices.md} | 1 - .../scraping_basics_javascript/{crawling => }/finding_links.js | 0 26 files changed, 12 insertions(+), 37 deletions(-) rename sources/academy/webscraping/scraping_basics_javascript/{introduction.md => 01_introduction.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/index.md => 02_data_extraction.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/browser_devtools.md => 03_browser_devtools.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/using_devtools.md => 04_using_devtools.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/devtools_continued.md => 05_devtools_continued.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/computer_preparation.md => 06_computer_preparation.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/project_setup.md => 07_project_setup.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/node_js_scraper.md => 08_node_js_scraper.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/node_continued.md => 09_node_continued.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{data_extraction/save_to_csv.md => 10_save_to_csv.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/index.md => 11_crawling.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/recap_extraction_basics.md => 12_recap_extraction_basics.md} (98%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/finding_links.md => 13_finding_links.md} (98%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/filtering_links.md => 14_filtering_links.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/relative_urls.md => 15_relative_urls.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/first_crawl.md => 16_first_crawl.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/scraping_the_data.md => 17_scraping_the_data.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/pro_scraping.md => 18_pro_scraping.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/headless_browser.md => 19_headless_browser.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling/exporting_data.md => 20_exporting_data.md} (98%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge/index.md => 21_challenge.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge/initializing_and_setting_up.md => 22_initializing_and_setting_up.md} (98%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge/modularity.md => 23_modularity.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{challenge/scraping_amazon.md => 24_scraping_amazon.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{best_practices.md => 25_best_practices.md} (99%) rename sources/academy/webscraping/scraping_basics_javascript/{crawling => }/finding_links.js (100%) diff --git a/sources/academy/webscraping/scraping_basics_javascript/introduction.md b/sources/academy/webscraping/scraping_basics_javascript/01_introduction.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/introduction.md rename to sources/academy/webscraping/scraping_basics_javascript/01_introduction.md index aff6571d1..6aa287129 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/introduction.md +++ b/sources/academy/webscraping/scraping_basics_javascript/01_introduction.md @@ -1,7 +1,6 @@ --- title: Introduction description: Start learning about web scraping, web crawling, data extraction, and popular tools to start developing your own scraper. -sidebar_position: 1.1 category: courses slug: /web-scraping-for-beginners/introduction --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/index.md b/sources/academy/webscraping/scraping_basics_javascript/02_data_extraction.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/index.md rename to sources/academy/webscraping/scraping_basics_javascript/02_data_extraction.md index 0482b5eb3..a8156e159 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript/02_data_extraction.md @@ -1,7 +1,6 @@ --- title: Basics of data extraction description: Learn about HTML, CSS, and JavaScript, the basic building blocks of a website, and how to use them in web scraping and data extraction. -sidebar_position: 1.2 category: courses slug: /web-scraping-for-beginners/data-extraction --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/browser_devtools.md b/sources/academy/webscraping/scraping_basics_javascript/03_browser_devtools.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/browser_devtools.md rename to sources/academy/webscraping/scraping_basics_javascript/03_browser_devtools.md index e4d24df9b..0cb5cda39 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/browser_devtools.md +++ b/sources/academy/webscraping/scraping_basics_javascript/03_browser_devtools.md @@ -1,7 +1,6 @@ --- title: Starting with browser DevTools description: Learn about browser DevTools, a valuable tool in the world of web scraping, and how you can use them to extract data from a website. -sidebar_position: 1 slug: /web-scraping-for-beginners/data-extraction/browser-devtools --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/using_devtools.md b/sources/academy/webscraping/scraping_basics_javascript/04_using_devtools.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/using_devtools.md rename to sources/academy/webscraping/scraping_basics_javascript/04_using_devtools.md index d48663824..1c3dde760 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/using_devtools.md +++ b/sources/academy/webscraping/scraping_basics_javascript/04_using_devtools.md @@ -1,7 +1,6 @@ --- title: Finding elements with DevTools description: Learn how to use browser DevTools, CSS selectors, and JavaScript via the DevTools console to extract data from a website. -sidebar_position: 2 slug: /web-scraping-for-beginners/data-extraction/using-devtools --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/devtools_continued.md b/sources/academy/webscraping/scraping_basics_javascript/05_devtools_continued.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/devtools_continued.md rename to sources/academy/webscraping/scraping_basics_javascript/05_devtools_continued.md index 79278386a..84f37e427 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/devtools_continued.md +++ b/sources/academy/webscraping/scraping_basics_javascript/05_devtools_continued.md @@ -1,7 +1,6 @@ --- title: Extracting data with DevTools description: Continue learning how to extract data from a website using browser DevTools, CSS selectors, and JavaScript via the DevTools console. -sidebar_position: 3 slug: /web-scraping-for-beginners/data-extraction/devtools-continued --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/computer_preparation.md b/sources/academy/webscraping/scraping_basics_javascript/06_computer_preparation.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/computer_preparation.md rename to sources/academy/webscraping/scraping_basics_javascript/06_computer_preparation.md index c4b9baf78..49a42e38f 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/computer_preparation.md +++ b/sources/academy/webscraping/scraping_basics_javascript/06_computer_preparation.md @@ -1,7 +1,6 @@ --- title: Computer preparation description: Set up your computer to be able to code scrapers with Node.js and JavaScript. Download Node.js and npm and run a Hello World script. -sidebar_position: 4 slug: /web-scraping-for-beginners/data-extraction/computer-preparation --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/project_setup.md b/sources/academy/webscraping/scraping_basics_javascript/07_project_setup.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/project_setup.md rename to sources/academy/webscraping/scraping_basics_javascript/07_project_setup.md index 72b146a40..39f3d47e3 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/project_setup.md +++ b/sources/academy/webscraping/scraping_basics_javascript/07_project_setup.md @@ -1,7 +1,6 @@ --- title: Project setup description: Create a new project with npm and Node.js. Install necessary libraries, and test that everything works before starting the next lesson. -sidebar_position: 5 slug: /web-scraping-for-beginners/data-extraction/project-setup --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_js_scraper.md b/sources/academy/webscraping/scraping_basics_javascript/08_node_js_scraper.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_js_scraper.md rename to sources/academy/webscraping/scraping_basics_javascript/08_node_js_scraper.md index 746cd7103..8146b2407 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_js_scraper.md +++ b/sources/academy/webscraping/scraping_basics_javascript/08_node_js_scraper.md @@ -1,7 +1,6 @@ --- title: Scraping with Node.js description: Learn how to use JavaScript and Node.js to create a web scraper, plus take advantage of the Cheerio and Got-scraping libraries to make your job easier. -sidebar_position: 6 slug: /web-scraping-for-beginners/data-extraction/node-js-scraper --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_continued.md b/sources/academy/webscraping/scraping_basics_javascript/09_node_continued.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_continued.md rename to sources/academy/webscraping/scraping_basics_javascript/09_node_continued.md index 1fdb51e7e..656c4e3de 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_continued.md +++ b/sources/academy/webscraping/scraping_basics_javascript/09_node_continued.md @@ -1,7 +1,6 @@ --- title: Extracting data with Node.js description: Continue learning how to create a web scraper with Node.js and Cheerio. Learn how to parse HTML and print the results of the data your scraper has collected. -sidebar_position: 7 slug: /web-scraping-for-beginners/data-extraction/node-continued --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/save_to_csv.md b/sources/academy/webscraping/scraping_basics_javascript/10_save_to_csv.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/save_to_csv.md rename to sources/academy/webscraping/scraping_basics_javascript/10_save_to_csv.md index b6ec1b7df..95ed44392 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/save_to_csv.md +++ b/sources/academy/webscraping/scraping_basics_javascript/10_save_to_csv.md @@ -1,7 +1,6 @@ --- title: Saving results to CSV description: Learn how to save the results of your scraper's collected data to a CSV file that can be opened in Excel, Google Sheets, or any other spreadsheets program. -sidebar_position: 8 slug: /web-scraping-for-beginners/data-extraction/save-to-csv --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/index.md b/sources/academy/webscraping/scraping_basics_javascript/11_crawling.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/index.md rename to sources/academy/webscraping/scraping_basics_javascript/11_crawling.md index 14ba32761..b3fce6c37 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript/11_crawling.md @@ -1,7 +1,6 @@ --- title: Basics of crawling description: Learn how to crawl the web with your scraper. How to extract links and URLs from web pages and how to manage the collected links to visit new pages. -sidebar_position: 1.3 category: courses slug: /web-scraping-for-beginners/crawling --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/recap_extraction_basics.md b/sources/academy/webscraping/scraping_basics_javascript/12_recap_extraction_basics.md similarity index 98% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/recap_extraction_basics.md rename to sources/academy/webscraping/scraping_basics_javascript/12_recap_extraction_basics.md index cdeea8cd5..f30735888 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/recap_extraction_basics.md +++ b/sources/academy/webscraping/scraping_basics_javascript/12_recap_extraction_basics.md @@ -1,7 +1,6 @@ --- -title: Recap - Data extraction +title: "Crawling: Recap - Data extraction" description: Review our e-commerce website scraper and refresh our memory about its code and the programming techniques we used to extract and save the data. -sidebar_position: 1 slug: /web-scraping-for-beginners/crawling/recap-extraction-basics --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.md b/sources/academy/webscraping/scraping_basics_javascript/13_finding_links.md similarity index 98% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.md rename to sources/academy/webscraping/scraping_basics_javascript/13_finding_links.md index 785d9396a..af317bf03 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.md +++ b/sources/academy/webscraping/scraping_basics_javascript/13_finding_links.md @@ -1,7 +1,6 @@ --- -title: Finding links +title: "Crawling: Finding links" description: Learn what a link looks like in HTML and how to find and extract their URLs when web scraping. Using both DevTools and Node.js. -sidebar_position: 2 slug: /web-scraping-for-beginners/crawling/finding-links --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/filtering_links.md b/sources/academy/webscraping/scraping_basics_javascript/14_filtering_links.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/filtering_links.md rename to sources/academy/webscraping/scraping_basics_javascript/14_filtering_links.md index 34d4961aa..8ced3acbd 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/filtering_links.md +++ b/sources/academy/webscraping/scraping_basics_javascript/14_filtering_links.md @@ -1,7 +1,6 @@ --- -title: Filtering links +title: "Crawling: Filtering links" description: When you extract links from a web page, you often end up with a lot of irrelevant URLs. Learn how to filter the links to only keep the ones you need. -sidebar_position: 3 slug: /web-scraping-for-beginners/crawling/filtering-links --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/relative_urls.md b/sources/academy/webscraping/scraping_basics_javascript/15_relative_urls.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/relative_urls.md rename to sources/academy/webscraping/scraping_basics_javascript/15_relative_urls.md index f9487c80a..d81a894cd 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/relative_urls.md +++ b/sources/academy/webscraping/scraping_basics_javascript/15_relative_urls.md @@ -1,7 +1,6 @@ --- -title: Relative URLs +title: "Crawling: Relative URLs" description: Learn about absolute and relative URLs used on web pages and how to work with them when parsing HTML with Cheerio in your scraper. -sidebar_position: 4 slug: /web-scraping-for-beginners/crawling/relative-urls --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/first_crawl.md b/sources/academy/webscraping/scraping_basics_javascript/16_first_crawl.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/first_crawl.md rename to sources/academy/webscraping/scraping_basics_javascript/16_first_crawl.md index 432d06f64..6a9d12cf1 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/first_crawl.md +++ b/sources/academy/webscraping/scraping_basics_javascript/16_first_crawl.md @@ -1,7 +1,6 @@ --- -title: Your first crawl +title: "Crawling: Your first crawl" description: Learn how to crawl the web using Node.js, Cheerio and an HTTP client. Extract URLs from pages and use them to visit more websites. -sidebar_position: 5 slug: /web-scraping-for-beginners/crawling/first-crawl --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/scraping_the_data.md b/sources/academy/webscraping/scraping_basics_javascript/17_scraping_the_data.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/scraping_the_data.md rename to sources/academy/webscraping/scraping_basics_javascript/17_scraping_the_data.md index 734c637d6..088f27262 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/scraping_the_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript/17_scraping_the_data.md @@ -1,7 +1,6 @@ --- -title: Scraping data +title: "Crawling: Scraping data" description: Learn how to add data extraction logic to your crawler, which will allow you to extract data from all the websites you crawled. -sidebar_position: 6 slug: /web-scraping-for-beginners/crawling/scraping-the-data --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/pro_scraping.md b/sources/academy/webscraping/scraping_basics_javascript/18_pro_scraping.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/pro_scraping.md rename to sources/academy/webscraping/scraping_basics_javascript/18_pro_scraping.md index b4b161641..13065cbd6 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/pro_scraping.md +++ b/sources/academy/webscraping/scraping_basics_javascript/18_pro_scraping.md @@ -1,7 +1,6 @@ --- -title: Professional scraping +title: "Crawling: Professional scraping" description: Learn how to build scrapers quicker and get better and more robust results by using Crawlee, an open-source library for scraping in Node.js. -sidebar_position: 7 slug: /web-scraping-for-beginners/crawling/pro-scraping --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/headless_browser.md b/sources/academy/webscraping/scraping_basics_javascript/19_headless_browser.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/headless_browser.md rename to sources/academy/webscraping/scraping_basics_javascript/19_headless_browser.md index b57a81064..d9e3157d0 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/headless_browser.md +++ b/sources/academy/webscraping/scraping_basics_javascript/19_headless_browser.md @@ -1,7 +1,6 @@ --- -title: Headless browsers +title: "Crawling: Headless browsers" description: Learn how to scrape the web with a headless browser using only a few lines of code. Chrome, Firefox, Safari, Edge - all are supported. -sidebar_position: 8 slug: /web-scraping-for-beginners/crawling/headless-browser --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/exporting_data.md b/sources/academy/webscraping/scraping_basics_javascript/20_exporting_data.md similarity index 98% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/exporting_data.md rename to sources/academy/webscraping/scraping_basics_javascript/20_exporting_data.md index d0d4baad8..ccd2200aa 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/crawling/exporting_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript/20_exporting_data.md @@ -1,7 +1,6 @@ --- -title: Exporting data +title: "Crawling: Exporting data" description: Learn how to export the data you scraped using Crawlee to CSV or JSON. -sidebar_position: 9 slug: /web-scraping-for-beginners/crawling/exporting-data --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/index.md b/sources/academy/webscraping/scraping_basics_javascript/21_challenge.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/index.md rename to sources/academy/webscraping/scraping_basics_javascript/21_challenge.md index 301501de8..0e478a9fa 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/challenge/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript/21_challenge.md @@ -1,7 +1,6 @@ --- title: Challenge description: Test your knowledge acquired in the previous sections of this course by building an Amazon scraper using Crawlee's CheerioCrawler! -sidebar_position: 1.4 slug: /web-scraping-for-beginners/challenge --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/initializing_and_setting_up.md b/sources/academy/webscraping/scraping_basics_javascript/22_initializing_and_setting_up.md similarity index 98% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/initializing_and_setting_up.md rename to sources/academy/webscraping/scraping_basics_javascript/22_initializing_and_setting_up.md index c0cf40bc1..21a43e0dd 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/challenge/initializing_and_setting_up.md +++ b/sources/academy/webscraping/scraping_basics_javascript/22_initializing_and_setting_up.md @@ -1,7 +1,6 @@ --- -title: Initializing & setting up +title: "Challenge: Initializing & setting up" description: When you extract links from a web page, you often end up with a lot of irrelevant URLs. Learn how to filter the links to only keep the ones you need. -sidebar_position: 1 slug: /web-scraping-for-beginners/challenge/initializing-and-setting-up --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/modularity.md b/sources/academy/webscraping/scraping_basics_javascript/23_modularity.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/modularity.md rename to sources/academy/webscraping/scraping_basics_javascript/23_modularity.md index e6d62c7b3..120584fc1 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/challenge/modularity.md +++ b/sources/academy/webscraping/scraping_basics_javascript/23_modularity.md @@ -1,7 +1,6 @@ --- -title: Modularity +title: "Challenge: Modularity" description: Before you build your first web scraper with Crawlee, it is important to understand the concept of modularity in programming. -sidebar_position: 2 slug: /web-scraping-for-beginners/challenge/modularity --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/scraping_amazon.md b/sources/academy/webscraping/scraping_basics_javascript/24_scraping_amazon.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/scraping_amazon.md rename to sources/academy/webscraping/scraping_basics_javascript/24_scraping_amazon.md index fa8291593..b1b52a3fb 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/challenge/scraping_amazon.md +++ b/sources/academy/webscraping/scraping_basics_javascript/24_scraping_amazon.md @@ -1,7 +1,6 @@ --- -title: Scraping Amazon +title: "Challenge: Scraping Amazon" description: Before you build your first web scraper with Crawlee, it is important to understand the concept of modularity in programming. -sidebar_position: 4 slug: /web-scraping-for-beginners/challenge/scraping-amazon --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/best_practices.md b/sources/academy/webscraping/scraping_basics_javascript/25_best_practices.md similarity index 99% rename from sources/academy/webscraping/scraping_basics_javascript/best_practices.md rename to sources/academy/webscraping/scraping_basics_javascript/25_best_practices.md index b3e1540cc..80aee9bb8 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/best_practices.md +++ b/sources/academy/webscraping/scraping_basics_javascript/25_best_practices.md @@ -1,7 +1,6 @@ --- title: Best practices description: Understand the standards and best practices that we here at Apify abide by to write readable, scalable, and maintainable code. -sidebar_position: 1.5 slug: /web-scraping-for-beginners/best-practices --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.js b/sources/academy/webscraping/scraping_basics_javascript/finding_links.js similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.js rename to sources/academy/webscraping/scraping_basics_javascript/finding_links.js From ba6925d1e79db57b6ec1c5450432835e41ed931d Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Tue, 13 May 2025 14:39:31 +0200 Subject: [PATCH 3/5] refactor: change links to point to correct places --- sources/academy/glossary/tools/apify_cli.md | 2 +- .../expert_scraping_with_apify/actors_webhooks.md | 2 +- .../platform/expert_scraping_with_apify/index.md | 2 +- .../node_js/analyzing_pages_and_fixing_errors.md | 2 +- .../tutorials/node_js/dealing_with_dynamic_pages.md | 2 +- .../anti_scraping/mitigation/using_proxies.md | 4 ++-- .../executing_scripts/extracting_data.md | 2 +- .../webscraping/puppeteer_playwright/index.md | 2 +- .../page/interacting_with_a_page.md | 2 +- .../scraping_basics_javascript/01_introduction.md | 2 +- .../scraping_basics_javascript/02_data_extraction.md | 2 +- .../03_browser_devtools.md | 4 ++-- .../scraping_basics_javascript/04_using_devtools.md | 8 ++++---- .../05_devtools_continued.md | 2 +- .../06_computer_preparation.md | 2 +- .../scraping_basics_javascript/07_project_setup.md | 2 +- .../scraping_basics_javascript/08_node_js_scraper.md | 6 +++--- .../scraping_basics_javascript/09_node_continued.md | 4 ++-- .../scraping_basics_javascript/10_save_to_csv.md | 2 +- .../scraping_basics_javascript/11_crawling.md | 6 +++--- .../12_recap_extraction_basics.md | 8 ++++---- .../scraping_basics_javascript/13_finding_links.md | 4 ++-- .../scraping_basics_javascript/14_filtering_links.md | 2 +- .../scraping_basics_javascript/15_relative_urls.md | 4 ++-- .../scraping_basics_javascript/16_first_crawl.md | 8 ++++---- .../17_scraping_the_data.md | 10 +++++----- .../scraping_basics_javascript/18_pro_scraping.md | 4 ++-- .../19_headless_browser.md | 4 ++-- .../scraping_basics_javascript/20_exporting_data.md | 2 +- .../scraping_basics_javascript/21_challenge.md | 8 ++++---- .../22_initializing_and_setting_up.md | 2 +- .../scraping_basics_javascript/23_modularity.md | 6 +++--- .../scraping_basics_javascript/24_scraping_amazon.md | 4 ++-- .../webscraping/scraping_basics_javascript/index.md | 12 ++++++------ 34 files changed, 69 insertions(+), 69 deletions(-) diff --git a/sources/academy/glossary/tools/apify_cli.md b/sources/academy/glossary/tools/apify_cli.md index 82cb187e7..927475758 100644 --- a/sources/academy/glossary/tools/apify_cli.md +++ b/sources/academy/glossary/tools/apify_cli.md @@ -15,7 +15,7 @@ The [Apify CLI](/cli) helps you create, develop, build and run Apify Actors, and ## Installing {#installing} -To install the Apfiy CLI, you'll first need npm, which comes preinstalled with Node.js. If you haven't yet installed Node, learn how to do that [here](../../webscraping/scraping_basics_javascript/data_extraction/computer_preparation.md). Additionally, make sure you've got an Apify account, as you will need to log in to the CLI to gain access to its full potential. +To install the Apfiy CLI, you'll first need npm, which comes preinstalled with Node.js. If you haven't yet installed Node, learn how to do that [here](../../webscraping/scraping_basics_javascript/06_computer_preparation.md). Additionally, make sure you've got an Apify account, as you will need to log in to the CLI to gain access to its full potential. Open up a terminal instance and run the following command: diff --git a/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md b/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md index 53814c003..5d977ab83 100644 --- a/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md +++ b/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md @@ -15,7 +15,7 @@ Thus far, you've run Actors on the platform and written an Actor of your own, wh ## Advanced Actor overview {#advanced-actors} -In this course, we'll be working out of the Amazon scraper project from the **Web scraping basics for JavaScript devs** course. If you haven't already built that project, you can do it in three short lessons [here](../../webscraping/scraping_basics_javascript/challenge/index.md). We've made a few small modifications to the project with the Apify SDK, but 99% of the code is still the same. +In this course, we'll be working out of the Amazon scraper project from the **Web scraping basics for JavaScript devs** course. If you haven't already built that project, you can do it in three short lessons [here](../../webscraping/scraping_basics_javascript/21_challenge.md). We've made a few small modifications to the project with the Apify SDK, but 99% of the code is still the same. Take another look at the files within your Amazon scraper project. You'll notice that there is a **Dockerfile**. Every single Actor has a Dockerfile (the Actor's **Image**) which tells Docker how to spin up a container on the Apify platform which can successfully run the Actor's code. "Apify Actors" is a serverless platform that runs multiple Docker containers. For a deeper understanding of Actor Dockerfiles, refer to the [Apify Actor Dockerfile docs](/sdk/js/docs/guides/docker-images#example-dockerfile). diff --git a/sources/academy/platform/expert_scraping_with_apify/index.md b/sources/academy/platform/expert_scraping_with_apify/index.md index caa49d0ef..6671ed457 100644 --- a/sources/academy/platform/expert_scraping_with_apify/index.md +++ b/sources/academy/platform/expert_scraping_with_apify/index.md @@ -26,7 +26,7 @@ Before developing a pro-level Apify scraper, there are some important things you ### Crawlee, Apify SDK, and the Apify CLI {#crawlee-apify-sdk-and-cli} -If you're feeling ambitious, you don't need to have any prior experience with Crawlee to get started with this course; however, at least 5–10 minutes of exposure is recommended. If you haven't yet tried out Crawlee, you can refer to [this lesson](../../webscraping/scraping_basics_javascript/crawling/pro_scraping.md) in the **Web scraping basics for JavaScript devs** course (and ideally follow along). To familiarize yourself with the Apify SDK, you can refer to the [Apify Platform](../apify_platform.md) category. +If you're feeling ambitious, you don't need to have any prior experience with Crawlee to get started with this course; however, at least 5–10 minutes of exposure is recommended. If you haven't yet tried out Crawlee, you can refer to [this lesson](../../webscraping/scraping_basics_javascript/18_pro_scraping.md) in the **Web scraping basics for JavaScript devs** course (and ideally follow along). To familiarize yourself with the Apify SDK, you can refer to the [Apify Platform](../apify_platform.md) category. The Apify CLI will play a core role in the running and testing of the Actor you will build, so if you haven't gotten it installed already, please refer to [this short lesson](../../glossary/tools/apify_cli.md). diff --git a/sources/academy/tutorials/node_js/analyzing_pages_and_fixing_errors.md b/sources/academy/tutorials/node_js/analyzing_pages_and_fixing_errors.md index 892a3dd59..b44357652 100644 --- a/sources/academy/tutorials/node_js/analyzing_pages_and_fixing_errors.md +++ b/sources/academy/tutorials/node_js/analyzing_pages_and_fixing_errors.md @@ -71,7 +71,7 @@ try { } ``` -Read more information about logging and error handling in our developer [best practices](../../webscraping/scraping_basics_javascript/best_practices.md) section. +Read more information about logging and error handling in our developer [best practices](../../webscraping/scraping_basics_javascript/25_best_practices.md) section. ### Saving snapshots {#saving-snapshots} diff --git a/sources/academy/tutorials/node_js/dealing_with_dynamic_pages.md b/sources/academy/tutorials/node_js/dealing_with_dynamic_pages.md index 21b8aee9a..e8cc272db 100644 --- a/sources/academy/tutorials/node_js/dealing_with_dynamic_pages.md +++ b/sources/academy/tutorials/node_js/dealing_with_dynamic_pages.md @@ -43,7 +43,7 @@ If you're in a brand new project, don't forget to initialize your project, then npm init -y && npm i crawlee ``` -Now, let's write some data extraction code to extract each product's data. This should look familiar if you went through the [Data Extraction](../../webscraping/scraping_basics_javascript/data_extraction/index.md) lessons: +Now, let's write some data extraction code to extract each product's data. This should look familiar if you went through the [Data Extraction](../../webscraping/scraping_basics_javascript/02_data_extraction.md) lessons: ```js import { CheerioCrawler } from 'crawlee'; diff --git a/sources/academy/webscraping/anti_scraping/mitigation/using_proxies.md b/sources/academy/webscraping/anti_scraping/mitigation/using_proxies.md index 819a50c6f..2e4aeeb8c 100644 --- a/sources/academy/webscraping/anti_scraping/mitigation/using_proxies.md +++ b/sources/academy/webscraping/anti_scraping/mitigation/using_proxies.md @@ -11,13 +11,13 @@ slug: /anti-scraping/mitigation/using-proxies --- -In the [**Web scraping basics for JavaScript devs**](../../scraping_basics_javascript/crawling/pro_scraping.md) course, we learned about the power of Crawlee, and how it can streamline the development process of web crawlers. You've already seen how powerful the `crawlee` package is; however, what you've been exposed to thus far is only the tip of the iceberg. +In the [**Web scraping basics for JavaScript devs**](../../scraping_basics_javascript/18_pro_scraping.md) course, we learned about the power of Crawlee, and how it can streamline the development process of web crawlers. You've already seen how powerful the `crawlee` package is; however, what you've been exposed to thus far is only the tip of the iceberg. Because proxies are so widely used in the scraping world, Crawlee has built-in features for implementing them in an effective way. One of the main functionalities that comes baked into Crawlee is proxy rotation, which is when each request is sent through a different proxy from a proxy pool. ## Implementing proxies in a scraper {#implementing-proxies} -Let's borrow some scraper code from the end of the [pro-scraping](../../scraping_basics_javascript/crawling/pro_scraping.md) lesson in our **Web scraping basics for JavaScript devs** course and paste it into a new file called **proxies.js**. This code enqueues all of the product links on [demo-webstore.apify.org](https://demo-webstore.apify.org)'s on-sale page, then makes a request to each product page and scrapes data about each one: +Let's borrow some scraper code from the end of the [pro-scraping](../../scraping_basics_javascript/18_pro_scraping.md) lesson in our **Web scraping basics for JavaScript devs** course and paste it into a new file called **proxies.js**. This code enqueues all of the product links on [demo-webstore.apify.org](https://demo-webstore.apify.org)'s on-sale page, then makes a request to each product page and scrapes data about each one: ```js // crawlee.js diff --git a/sources/academy/webscraping/puppeteer_playwright/executing_scripts/extracting_data.md b/sources/academy/webscraping/puppeteer_playwright/executing_scripts/extracting_data.md index 4fb52aa83..033bcde1f 100644 --- a/sources/academy/webscraping/puppeteer_playwright/executing_scripts/extracting_data.md +++ b/sources/academy/webscraping/puppeteer_playwright/executing_scripts/extracting_data.md @@ -14,7 +14,7 @@ import TabItem from '@theme/TabItem'; --- -Now that we know how to execute scripts on a page, we're ready to learn a bit about [data extraction](../../scraping_basics_javascript/data_extraction/index.md). In this lesson, we'll be scraping all the on-sale products from our [Fakestore](https://demo-webstore.apify.org/search/on-sale) website. Playwright & Puppeteer offer two main methods for data extraction: +Now that we know how to execute scripts on a page, we're ready to learn a bit about [data extraction](../../scraping_basics_javascript/02_data_extraction.md). In this lesson, we'll be scraping all the on-sale products from our [Fakestore](https://demo-webstore.apify.org/search/on-sale) website. Playwright & Puppeteer offer two main methods for data extraction: 1. Directly in `page.evaluate()` and other evaluate functions such as `page.$$eval()`. 2. In the Node.js context using a parsing library such as [Cheerio](https://www.npmjs.com/package/cheerio) diff --git a/sources/academy/webscraping/puppeteer_playwright/index.md b/sources/academy/webscraping/puppeteer_playwright/index.md index 77f878199..3e7a69169 100644 --- a/sources/academy/webscraping/puppeteer_playwright/index.md +++ b/sources/academy/webscraping/puppeteer_playwright/index.md @@ -63,7 +63,7 @@ npm install puppeteer -> For a more in-depth guide on how to set up the basic environment we'll be using in this tutorial, check out the [**Computer preparation**](../scraping_basics_javascript/data_extraction/computer_preparation.md) lesson in the **Web scraping basics for JavaScript devs** course +> For a more in-depth guide on how to set up the basic environment we'll be using in this tutorial, check out the [**Computer preparation**](../scraping_basics_javascript/06_computer_preparation.md) lesson in the **Web scraping basics for JavaScript devs** course ## Course overview {#course-overview} diff --git a/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md b/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md index ec1c5d0db..24f7af3b6 100644 --- a/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md +++ b/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md @@ -55,7 +55,7 @@ With `page.click()`, Puppeteer and Playwright actually drag the mouse and click, Notice that in the Playwright example, we are using a different selector than in the Puppeteer example. This is because Playwright supports [many custom CSS selectors](https://playwright.dev/docs/other-locators#css-elements-matching-one-of-the-conditions), such as the **has-text** pseudo class. As a rule of thumb, using text selectors is much more preferable to using regular selectors, as they are much less likely to break. If Google makes the sibling above the **Accept all** button a `
` element instead of a `