From fe187a6aa643373c5b946c2cece046d14e499a20 Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 08:51:28 -0600 Subject: [PATCH 1/8] Add files via upload --- Code/Merging all csv files in a dir.ipynb | 218 ++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 Code/Merging all csv files in a dir.ipynb diff --git a/Code/Merging all csv files in a dir.ipynb b/Code/Merging all csv files in a dir.ipynb new file mode 100644 index 0000000..eea537b --- /dev/null +++ b/Code/Merging all csv files in a dir.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merging all CSV Files in a Directory into Single CSV Containing All Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### lets first see what csv files are in the directory.." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sales_December_2019.csv\n", + "Sales_April_2019.csv\n", + "Sales_February_2019.csv\n", + "Sales_March_2019.csv\n", + "Sales_August_2019.csv\n", + "Sales_May_2019.csv\n", + "Sales_November_2019.csv\n", + "Sales_October_2019.csv\n", + "Sales_January_2019.csv\n", + "Sales_September_2019.csv\n", + "Sales_July_2019.csv\n", + "Sales_June_2019.csv\n" + ] + } + ], + "source": [ + "files=[f for f in os.listdir(\"./SalesAnalysis/Sales_Data\") if f.endswith('.csv')]\n", + "all_data = pd.DataFrame()\n", + "\n", + "for file in files:\n", + " print(file)\n", + " #df = pd.read_csv('./csse_covid_19_daily_reports_us/' + file)\n", + " #all_data = pd.concat([all_data, df])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ^ We just looked inside the directory and printed each file. Now, lets merge them into one single csv file containing all of our data.." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Order IDProductQuantity OrderedPrice EachOrder DatePurchase Address
0295665Macbook Pro Laptop1170012/30/19 00:01136 Church St, New York City, NY 10001
1295666LG Washing Machine1600.012/29/19 07:03562 2nd St, New York City, NY 10001
2295667USB-C Charging Cable111.9512/12/19 18:21277 Main St, New York City, NY 10001
329566827in FHD Monitor1149.9912/22/19 15:13410 6th St, San Francisco, CA 94016
4295669USB-C Charging Cable111.9512/18/19 12:3843 Hill St, Atlanta, GA 30301
\n", + "
" + ], + "text/plain": [ + " Order ID Product Quantity Ordered Price Each Order Date \\\n", + "0 295665 Macbook Pro Laptop 1 1700 12/30/19 00:01 \n", + "1 295666 LG Washing Machine 1 600.0 12/29/19 07:03 \n", + "2 295667 USB-C Charging Cable 1 11.95 12/12/19 18:21 \n", + "3 295668 27in FHD Monitor 1 149.99 12/22/19 15:13 \n", + "4 295669 USB-C Charging Cable 1 11.95 12/18/19 12:38 \n", + "\n", + " Purchase Address \n", + "0 136 Church St, New York City, NY 10001 \n", + "1 562 2nd St, New York City, NY 10001 \n", + "2 277 Main St, New York City, NY 10001 \n", + "3 410 6th St, San Francisco, CA 94016 \n", + "4 43 Hill St, Atlanta, GA 30301 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_data = pd.DataFrame()\n", + "for file in files:\n", + " df = pd.read_csv('././SalesAnalysis/Sales_Data/' + file)\n", + " all_data = pd.concat([all_data, df])\n", + "\n", + "all_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We just compressed each csv files' contents into a single csv and can now be used for analysis. You can verify that all contents were transferred by saving the new data frame ( in this case 'all_data') as a csv file using pd.to_csv('file_name') and then accessing it. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 7d8fbfcddd42777d05e12962c58b4086d5bb6864 Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 08:56:53 -0600 Subject: [PATCH 2/8] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 91301b6..60ae833 100644 --- a/README.md +++ b/README.md @@ -199,3 +199,6 @@ Sometimes you would need a functionality which is not directly provided by Keras (i.e. a neural network which takes input from multiple data sources, and does a combined training on this data), and you want that the data generator should be able to handle the data preparation on the fly, you can create a wrapper around ImageDataGenerator class to give the required output.[This notebook](./Code/CustomDataGen_Keras.ipynb) explains a simple solution to this usecase. 2. Another use case could be that you want to resize the images from a shape say 150x150 to a shape 224x224, which is generally utilized by the pretrained models, you can customize the ImageDataGenerator without coding your own data generator from ground up [(Example Notebook)](https://github.com/faizankshaikh/AV_Article_Codes/blob/master/Inception_From_Scratch/improvements/Inception_v1_from_Scratch.ipynb). + +- ### [Data Science Hack #44 Merging All CSV File in a Directory into a Single CSV](./Code/Merging all csv files in a dir.ipynb) +Sometimes all your data is not bundled into a single csv. In order to perfrom analysis on your data, all data must be compressed into a single file to make analysis much easier. The link provides the steps regarding the merge. From f9c065171455f60282d647fcfdcecda82ac041eb Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 09:00:36 -0600 Subject: [PATCH 3/8] Delete Merging all csv files in a dir.ipynb --- Code/Merging all csv files in a dir.ipynb | 218 ---------------------- 1 file changed, 218 deletions(-) delete mode 100644 Code/Merging all csv files in a dir.ipynb diff --git a/Code/Merging all csv files in a dir.ipynb b/Code/Merging all csv files in a dir.ipynb deleted file mode 100644 index eea537b..0000000 --- a/Code/Merging all csv files in a dir.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Merging all CSV Files in a Directory into Single CSV Containing All Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### lets first see what csv files are in the directory.." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sales_December_2019.csv\n", - "Sales_April_2019.csv\n", - "Sales_February_2019.csv\n", - "Sales_March_2019.csv\n", - "Sales_August_2019.csv\n", - "Sales_May_2019.csv\n", - "Sales_November_2019.csv\n", - "Sales_October_2019.csv\n", - "Sales_January_2019.csv\n", - "Sales_September_2019.csv\n", - "Sales_July_2019.csv\n", - "Sales_June_2019.csv\n" - ] - } - ], - "source": [ - "files=[f for f in os.listdir(\"./SalesAnalysis/Sales_Data\") if f.endswith('.csv')]\n", - "all_data = pd.DataFrame()\n", - "\n", - "for file in files:\n", - " print(file)\n", - " #df = pd.read_csv('./csse_covid_19_daily_reports_us/' + file)\n", - " #all_data = pd.concat([all_data, df])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ^ We just looked inside the directory and printed each file. Now, lets merge them into one single csv file containing all of our data.." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Order IDProductQuantity OrderedPrice EachOrder DatePurchase Address
0295665Macbook Pro Laptop1170012/30/19 00:01136 Church St, New York City, NY 10001
1295666LG Washing Machine1600.012/29/19 07:03562 2nd St, New York City, NY 10001
2295667USB-C Charging Cable111.9512/12/19 18:21277 Main St, New York City, NY 10001
329566827in FHD Monitor1149.9912/22/19 15:13410 6th St, San Francisco, CA 94016
4295669USB-C Charging Cable111.9512/18/19 12:3843 Hill St, Atlanta, GA 30301
\n", - "
" - ], - "text/plain": [ - " Order ID Product Quantity Ordered Price Each Order Date \\\n", - "0 295665 Macbook Pro Laptop 1 1700 12/30/19 00:01 \n", - "1 295666 LG Washing Machine 1 600.0 12/29/19 07:03 \n", - "2 295667 USB-C Charging Cable 1 11.95 12/12/19 18:21 \n", - "3 295668 27in FHD Monitor 1 149.99 12/22/19 15:13 \n", - "4 295669 USB-C Charging Cable 1 11.95 12/18/19 12:38 \n", - "\n", - " Purchase Address \n", - "0 136 Church St, New York City, NY 10001 \n", - "1 562 2nd St, New York City, NY 10001 \n", - "2 277 Main St, New York City, NY 10001 \n", - "3 410 6th St, San Francisco, CA 94016 \n", - "4 43 Hill St, Atlanta, GA 30301 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all_data = pd.DataFrame()\n", - "for file in files:\n", - " df = pd.read_csv('././SalesAnalysis/Sales_Data/' + file)\n", - " all_data = pd.concat([all_data, df])\n", - "\n", - "all_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### We just compressed each csv files' contents into a single csv and can now be used for analysis. You can verify that all contents were transferred by saving the new data frame ( in this case 'all_data') as a csv file using pd.to_csv('file_name') and then accessing it. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 6e3e210113fe0d5f95127ad1a63d5a1a1d83e0bd Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 09:02:08 -0600 Subject: [PATCH 4/8] Add files via upload --- Code/merge_csv_files.ipynb | 218 +++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 Code/merge_csv_files.ipynb diff --git a/Code/merge_csv_files.ipynb b/Code/merge_csv_files.ipynb new file mode 100644 index 0000000..eea537b --- /dev/null +++ b/Code/merge_csv_files.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merging all CSV Files in a Directory into Single CSV Containing All Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### lets first see what csv files are in the directory.." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sales_December_2019.csv\n", + "Sales_April_2019.csv\n", + "Sales_February_2019.csv\n", + "Sales_March_2019.csv\n", + "Sales_August_2019.csv\n", + "Sales_May_2019.csv\n", + "Sales_November_2019.csv\n", + "Sales_October_2019.csv\n", + "Sales_January_2019.csv\n", + "Sales_September_2019.csv\n", + "Sales_July_2019.csv\n", + "Sales_June_2019.csv\n" + ] + } + ], + "source": [ + "files=[f for f in os.listdir(\"./SalesAnalysis/Sales_Data\") if f.endswith('.csv')]\n", + "all_data = pd.DataFrame()\n", + "\n", + "for file in files:\n", + " print(file)\n", + " #df = pd.read_csv('./csse_covid_19_daily_reports_us/' + file)\n", + " #all_data = pd.concat([all_data, df])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ^ We just looked inside the directory and printed each file. Now, lets merge them into one single csv file containing all of our data.." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Order IDProductQuantity OrderedPrice EachOrder DatePurchase Address
0295665Macbook Pro Laptop1170012/30/19 00:01136 Church St, New York City, NY 10001
1295666LG Washing Machine1600.012/29/19 07:03562 2nd St, New York City, NY 10001
2295667USB-C Charging Cable111.9512/12/19 18:21277 Main St, New York City, NY 10001
329566827in FHD Monitor1149.9912/22/19 15:13410 6th St, San Francisco, CA 94016
4295669USB-C Charging Cable111.9512/18/19 12:3843 Hill St, Atlanta, GA 30301
\n", + "
" + ], + "text/plain": [ + " Order ID Product Quantity Ordered Price Each Order Date \\\n", + "0 295665 Macbook Pro Laptop 1 1700 12/30/19 00:01 \n", + "1 295666 LG Washing Machine 1 600.0 12/29/19 07:03 \n", + "2 295667 USB-C Charging Cable 1 11.95 12/12/19 18:21 \n", + "3 295668 27in FHD Monitor 1 149.99 12/22/19 15:13 \n", + "4 295669 USB-C Charging Cable 1 11.95 12/18/19 12:38 \n", + "\n", + " Purchase Address \n", + "0 136 Church St, New York City, NY 10001 \n", + "1 562 2nd St, New York City, NY 10001 \n", + "2 277 Main St, New York City, NY 10001 \n", + "3 410 6th St, San Francisco, CA 94016 \n", + "4 43 Hill St, Atlanta, GA 30301 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_data = pd.DataFrame()\n", + "for file in files:\n", + " df = pd.read_csv('././SalesAnalysis/Sales_Data/' + file)\n", + " all_data = pd.concat([all_data, df])\n", + "\n", + "all_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We just compressed each csv files' contents into a single csv and can now be used for analysis. You can verify that all contents were transferred by saving the new data frame ( in this case 'all_data') as a csv file using pd.to_csv('file_name') and then accessing it. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From de87671b4d89d69e6ac462cf9db199ae61cc2f92 Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 09:02:22 -0600 Subject: [PATCH 5/8] Add files via upload --- merge_csv_files.ipynb | 218 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 merge_csv_files.ipynb diff --git a/merge_csv_files.ipynb b/merge_csv_files.ipynb new file mode 100644 index 0000000..eea537b --- /dev/null +++ b/merge_csv_files.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merging all CSV Files in a Directory into Single CSV Containing All Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### lets first see what csv files are in the directory.." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sales_December_2019.csv\n", + "Sales_April_2019.csv\n", + "Sales_February_2019.csv\n", + "Sales_March_2019.csv\n", + "Sales_August_2019.csv\n", + "Sales_May_2019.csv\n", + "Sales_November_2019.csv\n", + "Sales_October_2019.csv\n", + "Sales_January_2019.csv\n", + "Sales_September_2019.csv\n", + "Sales_July_2019.csv\n", + "Sales_June_2019.csv\n" + ] + } + ], + "source": [ + "files=[f for f in os.listdir(\"./SalesAnalysis/Sales_Data\") if f.endswith('.csv')]\n", + "all_data = pd.DataFrame()\n", + "\n", + "for file in files:\n", + " print(file)\n", + " #df = pd.read_csv('./csse_covid_19_daily_reports_us/' + file)\n", + " #all_data = pd.concat([all_data, df])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ^ We just looked inside the directory and printed each file. Now, lets merge them into one single csv file containing all of our data.." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Order IDProductQuantity OrderedPrice EachOrder DatePurchase Address
0295665Macbook Pro Laptop1170012/30/19 00:01136 Church St, New York City, NY 10001
1295666LG Washing Machine1600.012/29/19 07:03562 2nd St, New York City, NY 10001
2295667USB-C Charging Cable111.9512/12/19 18:21277 Main St, New York City, NY 10001
329566827in FHD Monitor1149.9912/22/19 15:13410 6th St, San Francisco, CA 94016
4295669USB-C Charging Cable111.9512/18/19 12:3843 Hill St, Atlanta, GA 30301
\n", + "
" + ], + "text/plain": [ + " Order ID Product Quantity Ordered Price Each Order Date \\\n", + "0 295665 Macbook Pro Laptop 1 1700 12/30/19 00:01 \n", + "1 295666 LG Washing Machine 1 600.0 12/29/19 07:03 \n", + "2 295667 USB-C Charging Cable 1 11.95 12/12/19 18:21 \n", + "3 295668 27in FHD Monitor 1 149.99 12/22/19 15:13 \n", + "4 295669 USB-C Charging Cable 1 11.95 12/18/19 12:38 \n", + "\n", + " Purchase Address \n", + "0 136 Church St, New York City, NY 10001 \n", + "1 562 2nd St, New York City, NY 10001 \n", + "2 277 Main St, New York City, NY 10001 \n", + "3 410 6th St, San Francisco, CA 94016 \n", + "4 43 Hill St, Atlanta, GA 30301 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_data = pd.DataFrame()\n", + "for file in files:\n", + " df = pd.read_csv('././SalesAnalysis/Sales_Data/' + file)\n", + " all_data = pd.concat([all_data, df])\n", + "\n", + "all_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We just compressed each csv files' contents into a single csv and can now be used for analysis. You can verify that all contents were transferred by saving the new data frame ( in this case 'all_data') as a csv file using pd.to_csv('file_name') and then accessing it. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 3daba1cdda8004cab790530aea80003a14405991 Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 09:06:45 -0600 Subject: [PATCH 6/8] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 60ae833..8c5b138 100644 --- a/README.md +++ b/README.md @@ -200,5 +200,5 @@ Sometimes you would need a functionality which is not directly provided by Keras 2. Another use case could be that you want to resize the images from a shape say 150x150 to a shape 224x224, which is generally utilized by the pretrained models, you can customize the ImageDataGenerator without coding your own data generator from ground up [(Example Notebook)](https://github.com/faizankshaikh/AV_Article_Codes/blob/master/Inception_From_Scratch/improvements/Inception_v1_from_Scratch.ipynb). -- ### [Data Science Hack #44 Merging All CSV File in a Directory into a Single CSV](./Code/Merging all csv files in a dir.ipynb) -Sometimes all your data is not bundled into a single csv. In order to perfrom analysis on your data, all data must be compressed into a single file to make analysis much easier. The link provides the steps regarding the merge. +- ### [Data Science Hack #45 Merging All CSV Files in a Directory into a Single CSV File Containing All Data](./Code/merge_csv_files.ipynb) +Sometime sour data isnt compiled into a single csv. To make analysis easier, transporting all data into one file is a must. Follow the link to see the simple procedure. From 263c573ab1f1c6e8b8afe8ec32cec1cfbdccb453 Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 09:12:38 -0600 Subject: [PATCH 7/8] Delete merge_csv_files.ipynb --- merge_csv_files.ipynb | 218 ------------------------------------------ 1 file changed, 218 deletions(-) delete mode 100644 merge_csv_files.ipynb diff --git a/merge_csv_files.ipynb b/merge_csv_files.ipynb deleted file mode 100644 index eea537b..0000000 --- a/merge_csv_files.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Merging all CSV Files in a Directory into Single CSV Containing All Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### lets first see what csv files are in the directory.." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sales_December_2019.csv\n", - "Sales_April_2019.csv\n", - "Sales_February_2019.csv\n", - "Sales_March_2019.csv\n", - "Sales_August_2019.csv\n", - "Sales_May_2019.csv\n", - "Sales_November_2019.csv\n", - "Sales_October_2019.csv\n", - "Sales_January_2019.csv\n", - "Sales_September_2019.csv\n", - "Sales_July_2019.csv\n", - "Sales_June_2019.csv\n" - ] - } - ], - "source": [ - "files=[f for f in os.listdir(\"./SalesAnalysis/Sales_Data\") if f.endswith('.csv')]\n", - "all_data = pd.DataFrame()\n", - "\n", - "for file in files:\n", - " print(file)\n", - " #df = pd.read_csv('./csse_covid_19_daily_reports_us/' + file)\n", - " #all_data = pd.concat([all_data, df])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ^ We just looked inside the directory and printed each file. Now, lets merge them into one single csv file containing all of our data.." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Order IDProductQuantity OrderedPrice EachOrder DatePurchase Address
0295665Macbook Pro Laptop1170012/30/19 00:01136 Church St, New York City, NY 10001
1295666LG Washing Machine1600.012/29/19 07:03562 2nd St, New York City, NY 10001
2295667USB-C Charging Cable111.9512/12/19 18:21277 Main St, New York City, NY 10001
329566827in FHD Monitor1149.9912/22/19 15:13410 6th St, San Francisco, CA 94016
4295669USB-C Charging Cable111.9512/18/19 12:3843 Hill St, Atlanta, GA 30301
\n", - "
" - ], - "text/plain": [ - " Order ID Product Quantity Ordered Price Each Order Date \\\n", - "0 295665 Macbook Pro Laptop 1 1700 12/30/19 00:01 \n", - "1 295666 LG Washing Machine 1 600.0 12/29/19 07:03 \n", - "2 295667 USB-C Charging Cable 1 11.95 12/12/19 18:21 \n", - "3 295668 27in FHD Monitor 1 149.99 12/22/19 15:13 \n", - "4 295669 USB-C Charging Cable 1 11.95 12/18/19 12:38 \n", - "\n", - " Purchase Address \n", - "0 136 Church St, New York City, NY 10001 \n", - "1 562 2nd St, New York City, NY 10001 \n", - "2 277 Main St, New York City, NY 10001 \n", - "3 410 6th St, San Francisco, CA 94016 \n", - "4 43 Hill St, Atlanta, GA 30301 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all_data = pd.DataFrame()\n", - "for file in files:\n", - " df = pd.read_csv('././SalesAnalysis/Sales_Data/' + file)\n", - " all_data = pd.concat([all_data, df])\n", - "\n", - "all_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### We just compressed each csv files' contents into a single csv and can now be used for analysis. You can verify that all contents were transferred by saving the new data frame ( in this case 'all_data') as a csv file using pd.to_csv('file_name') and then accessing it. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From f0b66215cf71026cffc192fcaadd97b19e2c0069 Mon Sep 17 00:00:00 2001 From: Benjamin Parsons Date: Thu, 10 Sep 2020 09:13:33 -0600 Subject: [PATCH 8/8] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c5b138..c5ebd0d 100644 --- a/README.md +++ b/README.md @@ -201,4 +201,4 @@ Sometimes you would need a functionality which is not directly provided by Keras 2. Another use case could be that you want to resize the images from a shape say 150x150 to a shape 224x224, which is generally utilized by the pretrained models, you can customize the ImageDataGenerator without coding your own data generator from ground up [(Example Notebook)](https://github.com/faizankshaikh/AV_Article_Codes/blob/master/Inception_From_Scratch/improvements/Inception_v1_from_Scratch.ipynb). - ### [Data Science Hack #45 Merging All CSV Files in a Directory into a Single CSV File Containing All Data](./Code/merge_csv_files.ipynb) -Sometime sour data isnt compiled into a single csv. To make analysis easier, transporting all data into one file is a must. Follow the link to see the simple procedure. +Sometimes our data isn't compiled into a single csv. To make analysis easier, transporting all data into one file is a must. Follow the link to see the simple procedure.