Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a162bfa
updating required packages
rachelannelise Dec 18, 2020
58c13dd
upgrading requirements
rachelannelise Dec 18, 2020
f993218
reorg for package consistency
rachelannelise Dec 18, 2020
ffaff7f
pinning requirements
rachelannelise Dec 18, 2020
d769990
pinning requirements
rachelannelise Dec 18, 2020
f44ffa9
pinning requirements
rachelannelise Dec 18, 2020
bfe0a5a
adding file pathlib for yaml merge
rachelannelise Dec 18, 2020
94783d2
updating relative path location
rachelannelise Dec 18, 2020
ac2c964
updating relative path location
rachelannelise Dec 18, 2020
a87ee8b
updating relative path location
rachelannelise Dec 18, 2020
5125b61
include dbt_project file yaml
hbbtstar Dec 19, 2020
9848311
change merge_settings to use pkg_resources
hbbtstar Dec 19, 2020
abe15ed
adding dbt to package data
rachelannelise Dec 21, 2020
a1f863d
adding more pkg resources
rachelannelise Dec 21, 2020
9191cb4
adding more pkg resources
rachelannelise Dec 21, 2020
b332505
adding more pkg resources
rachelannelise Dec 21, 2020
f2a4c20
adding profile to args
rachelannelise Dec 21, 2020
0f02e8f
adding all files in dbt to manifest
rachelannelise Dec 21, 2020
fa74fac
adding models to manifest
rachelannelise Dec 21, 2020
d39966b
adding dbt models to manifest
rachelannelise Dec 21, 2020
f0107f6
adding dbt models to manifest
rachelannelise Dec 21, 2020
5567c4d
adding init to require install
rachelannelise Dec 21, 2020
6db0be0
adding init to require install
rachelannelise Dec 21, 2020
756386f
adding init to require install
rachelannelise Dec 21, 2020
f8ccc93
removing init and adding package data
rachelannelise Dec 21, 2020
aa70df4
adding all package data files
rachelannelise Dec 21, 2020
d2c7318
adding all package data files
rachelannelise Dec 21, 2020
0562b97
adding all package data files
rachelannelise Dec 21, 2020
4c2622a
adding all package data files
rachelannelise Dec 21, 2020
5fbccef
trying manifest
rachelannelise Dec 21, 2020
ae30517
trying manifest
rachelannelise Dec 21, 2020
b645eef
trying manifest
rachelannelise Dec 21, 2020
6d4f0da
trying manifest
rachelannelise Dec 21, 2020
c3513b2
removing package_data
rachelannelise Dec 21, 2020
f406e49
changing dbt requirements
rachelannelise Jan 11, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
include *.txt
include parsely_raw_data/*.thrift
include parsely_raw_data/dbt/redshift/*
recursive-include parsely_raw_data *.sql
1 change: 0 additions & 1 deletion dbt/__init__.py

This file was deleted.

3 changes: 0 additions & 3 deletions dbt/redshift/__init__.py

This file was deleted.

4 changes: 0 additions & 4 deletions dbt/requirements.txt

This file was deleted.

5 changes: 3 additions & 2 deletions parsely_raw_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
limitations under the License.
"""

__version__ = "2.4.0"
__version__ = "2.4.1"

from . import bigquery, docgen, redshift, s3, samples, schema, stream, utils
from . import bigquery, docgen, redshift, s3, samples, schema, stream, utils, dbt

__all__ = [
"bigquery",
Expand All @@ -27,6 +27,7 @@
"schema",
"stream",
"utils",
"dbt"
]

BOOLEAN_FIELDS = {"flags_is_amp"}
Expand Down
7 changes: 7 additions & 0 deletions parsely_raw_data/dbt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from __future__ import absolute_import

from . import redshift as parsely_dbt_redshift

__all__ = [
"parsely_dbt_redshift"
]
File renamed without changes.
20 changes: 20 additions & 0 deletions parsely_raw_data/dbt/redshift/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import absolute_import

from .redshift_etl import migrate_from_s3_by_day
from .settings import (
DBT_PROFILE_LOCATION,
DBT_PROFILE_TARGET_NAME,
ETL_END_DATE,
ETL_KEEP_RAW_DATA,
ETL_START_DATE,
PARSELY_RAW_DATA_TABLE,
REDSHIFT_DATABASE,
REDSHIFT_HOST,
REDSHIFT_PASSWORD,
REDSHIFT_PORT,
REDSHIFT_USER,
S3_AWS_ACCESS_KEY_ID,
S3_AWS_SECRET_ACCESS_KEY,
S3_NETWORK_NAME,
)
from .settings import migrate_settings
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
- "{% if var('etl:keep_rawdata') == true %} select 1 {% else %} truncate table {{\
\ target.schema }}.parsely_rawdata {% endif %}"
- "truncate table {{var('parsely:events')}}"
"profile": "parsely-dwh"
"profile": "parsely_dwh"
"source-paths":
- "models"
"target-path": "target"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from __future__ import absolute_import
import logging
import os
import pkg_resources
import psycopg2
import subprocess
from dateutil import rrule

from parsely_raw_data import redshift as parsely_redshift
from parsely_raw_data import utils as parsely_utils
from dbt.redshift.settings.default import (
from parsely_raw_data.dbt.redshift.settings import (
DBT_PROFILE_LOCATION,
DBT_PROFILE_TARGET_NAME,
ETL_END_DATE,
Expand All @@ -23,7 +23,7 @@
S3_AWS_SECRET_ACCESS_KEY,
S3_NETWORK_NAME,
)
from dbt.redshift.settings.merge_settings_yaml import migrate_settings
from parsely_raw_data.dbt.redshift.settings.merge_settings_yaml import migrate_settings

SETTINGS_ARG_MAPPING = {
'table_name': PARSELY_RAW_DATA_TABLE,
Expand Down Expand Up @@ -82,9 +82,10 @@ def migrate_from_s3_by_day(network=S3_NETWORK_NAME,
secret_access_key=secret_access_key)

# This runs dbt once all of the new data has been copied into the raw data table
dpl_wd = os.path.join(os.getcwd(), 'dbt/redshift/')
logging.info(f'Running the dbt script located at: {dpl_wd}/run_parsely_dpl.sh')
subprocess.call(dpl_wd + "run_parsely_dpl.sh " + dbt_profiles_dir + ' ' + dbt_target, shell=True, cwd=dpl_wd)
dbt_etl_script_loc = pkg_resources.resource_filename("parsely_raw_data", "dbt/redshift/run_parsely_dpl.sh")
dbt_etl_cwd = pkg_resources.resource_filename("parsely_raw_data", "dbt/redshift/")
logging.info(f'Running the dbt script located at: {dbt_etl_script_loc}')
subprocess.call(dbt_etl_script_loc + ' ' + dbt_profiles_dir + ' ' + dbt_target, shell=True, cwd=dbt_etl_cwd)


def main():
Expand All @@ -95,14 +96,16 @@ def main():
help='The last day to process data from S3 to Redshift in the format YYYY-MM-DD')
parser.add_argument('--dbt_profiles_dir', required=False, default=DBT_PROFILE_LOCATION,
help='The location from root that contains the .dbt/profiles.yml file, example: /home/user/.dbt/')
parser.add_argument('--dbt_profile', required=False, default='parsely_dwh',
help='The name of the dbt profile located in the local /.dbt/profiles.yml file')
parser.add_argument('--dbt_target', required=False, default=DBT_PROFILE_TARGET_NAME,
help='The target ie. dev, prod, or test to use within the dbt profiles.yml file.')
parser.add_argument('--create-table', action='store_true', default=True,
help='Optional: create the Redshift Parse.ly rawdata table because it does not yet exist.')
args = parser.parse_args()

# Reset dbt_profile to any updated settings:
settings_migration = migrate_settings()
settings_migration = migrate_settings(profile=args.dbt_profile, table=args.table_name)
if not settings_migration:
logging.warning("Settings not copied to dbt_profiles.yml successfully.")
raise Exception("Settings not copied to dbt_profiles.yml successfully. Please edit default.py or copy the"
Expand Down
3 changes: 3 additions & 0 deletions parsely_raw_data/dbt/redshift/settings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .default import *
from .merge_settings_yaml import migrate_settings

Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import yaml
from dbt.redshift.settings.default import *
import pkg_resources
from pathlib import Path

from .default import *

SETTINGS_VAR_MAPPING = [
{'location': 'profile', 'settings': DBT_PROFILE_NAME},
{'location': 'parsely:events', 'settings': PARSELY_RAW_DATA_TABLE},
{'location': 'parsely:timezone', 'settings': ETL_TIME_ZONE},
{'location': 'parsely:actions', 'settings': ETL_PARSELY_ACTIONS},
{'location': 'etl:keep_rawdata', 'settings': ETL_KEEP_RAW_DATA},
Expand All @@ -19,8 +20,13 @@
]


def migrate_settings():
with open(r'dbt/redshift/dbt_project.yml') as file:
def migrate_settings(profile=DBT_PROFILE_NAME, table=PARSELY_RAW_DATA_TABLE):
# because this is a package resource, have to reference it with pkg_resources
filepath = pkg_resources.resource_filename("parsely_raw_data", "dbt/redshift/dbt_project.yml")
SETTINGS_VAR_MAPPING.append({'location': 'profile', 'settings': profile})
SETTINGS_VAR_MAPPING.append({'location': 'parsely:events', 'settings': table})

with open(filepath) as file:
dbt_profile = yaml.load(file, Loader=yaml.FullLoader)

for row in SETTINGS_VAR_MAPPING:
Expand All @@ -31,7 +37,7 @@ def migrate_settings():
dbt_profile['vars'][row['location']] = str(row['settings'])
continue

with open(r'dbt/redshift/dbt_project.yml', 'w') as file:
with open(filepath, 'w') as file:
yaml.dump(dbt_profile, file, default_style='"')
stored_successfully = True

Expand Down
23 changes: 13 additions & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
boto3>=1.4.4,<1.17
botocore>=1.5.0,<1.20
google-api-core>=1.16.0,<1.24.0
protobuf>=3.6.0,<3.15
google-api-python-client
dbt>=0.15.0,<0.18
google-api-core<1.17.0,>=1.16.0
protobuf<3.12,>=3.6.0
google-api-python-client==1.8.0
oauth2client==4.1.3
psycopg2cffi-compat
six
tablib
xlsxwriter
tabulate
oauth2client
pytest
pyyaml<=5.1
pytest==6.2.1
python-dateutil==2.8.1
pyyaml==5.3.1
six==1.15.0
sqlalchemy==1.3.20
tablib==3.0.0
tabulate==0.8.7
xlsxwriter==1.3.7
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def run_setup():
'parsely_s3 = parsely_raw_data.s3:main',
'parsely_stream = parsely_raw_data.stream:main',
'parsely_schema = parsely_raw_data.docgen:main',
'parsely_redshift_etl = dbt.redshift.redshift_etl:main'
'parsely_redshift_etl = parsely_raw_data.dbt.redshift.redshift_etl:main'
]
},
install_requires=install_requires,
Expand Down