diff --git a/aws-blog-dynamodb-analysis/LICENSE.txt b/aws-blog-dynamodb-analysis/LICENSE.txt new file mode 100644 index 00000000..d31389f0 --- /dev/null +++ b/aws-blog-dynamodb-analysis/LICENSE.txt @@ -0,0 +1,10 @@ +Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +Licensed under the Amazon Software License (the "License"). You may not use this file +except in compliance with the License. A copy of the License is located at + + http://aws.amazon.com/asl/ + +or in the "license" file accompanying this file. This file is distributed on an "AS IS" +BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License +for the specific language governing permissions and limitations under the License. diff --git a/aws-blog-dynamodb-analysis/NOTICE.txt b/aws-blog-dynamodb-analysis/NOTICE.txt new file mode 100644 index 00000000..056b8c74 --- /dev/null +++ b/aws-blog-dynamodb-analysis/NOTICE.txt @@ -0,0 +1,2 @@ +aws-blog-dynamodb-analysis +Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. diff --git a/aws-blog-dynamodb-analysis/README.md b/aws-blog-dynamodb-analysis/README.md new file mode 100644 index 00000000..898e152b --- /dev/null +++ b/aws-blog-dynamodb-analysis/README.md @@ -0,0 +1,49 @@ +tweets-streaming.py +------------------- + +This script pulls random tweets from the Twitter API and stores them in Amazon DynamoDB. There are two modules needed to execute the script: + +- boto3: https://aws.amazon.com/sdk-for-python/ +- twitter: https://pypi.python.org/pypi/twitter/ + +A Twitter account is needed to access Twitter API. Go to https://www.twitter.com/ and sign up for a free account, if you don't already have one. Once your account is up, go to https://apps.twitter.com/ and on the main landing page, click the grey "Create New App" button. After you give it a name, you can go to the "Keys and Access Tokens" to get your credentials to use the Twitter API. You will need to generate Customer Tokens/Secret and Access Token/Secret. All four keys will be used to authenticate your request. + +In the script, update the following lines with the real security credentials: + + # Twitter security credentials + ACCESS_TOKEN = "...01234..." + ACCESS_SECRET = "...i7RkW..." + CONSUMER_KEY = "...be4Ma..." + CONSUMER_SECRET = "...btcar..." + +This section can be customized according to your preference. Use your own table name and TTL value as desired: + + # Global variables. + dynamodb_table = "TwitterAnalysis" + expires_after_days = 30 + + + +tweets-simulated.py +------------------- + +This script generates simulated tweets and stores them in Amazon DynamoDB. There are three modules needed to execute the script: + +- boto3: https://aws.amazon.com/sdk-for-python/ +- names: https://pypi.python.org/pypi/names/ +- loremipsum: https://pypi.python.org/pypi/loremipsum/ + +In the script, update the following lines with the real security credentials: + + # Twitter security credentials + ACCESS_TOKEN = "...01234..." + ACCESS_SECRET = "...i7RkW..." + CONSUMER_KEY = "...be4Ma..." + CONSUMER_SECRET = "...btcar..." + +This section can be customized according to your preference. Use your own table name and TTL value as desired: + + # Global variables + dynamodb_table = "TwitterAnalysis" + provisioned_wcu = 1 + diff --git a/aws-blog-dynamodb-analysis/tweets-simulated.py b/aws-blog-dynamodb-analysis/tweets-simulated.py new file mode 100644 index 00000000..c5db9fec --- /dev/null +++ b/aws-blog-dynamodb-analysis/tweets-simulated.py @@ -0,0 +1,75 @@ +#!/usr/bin/python + +# Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use this file +# except in compliance with the License. A copy of the License is located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" +# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License +# for the specific language governing permissions and limitations under the License. + +# Import modules +from loremipsum import get_sentences +import boto3 +import names +import random +import string +import signal +import math +import time +import sys + +# Global variables +dynamodb_table = "TwitterAnalysis" +provisioned_wcu = 1 + +# Initiate DynamoDB client +client = boto3.client('dynamodb') + +# Signal handler, Ctrl+c to quit +def signal_handler(signal, frame): + print "\n" + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) + +# Actions +insert_to_ddb = True; +print_to_screen = True; + +# Start the loop to generate simulated tweets +while(True) : + # Generate fake tweet + user_id = names.get_first_name() + tweet_id = str(random.randint(pow(10,16),pow(10,17)-1)) + created_at = time.strftime("%a %b %d %H:%M:%S +0000 %Y", time.gmtime()) + language = random.choice(['de', 'en', 'es', 'fr', 'id', 'nl', 'pt', 'sk']) + text = str(get_sentences(1)[0]) + + # Store tweet in DynamoDB + if insert_to_ddb == True : + res = client.put_item( + TableName=dynamodb_table, + Item={ + 'user_id' : { 'S' : user_id }, + 'tweet_id' : { 'N' : tweet_id }, + 'created_at': { 'S' : created_at }, + 'language' : { 'S' : language }, + 'text' : { 'S' : text } + }) + + # Print output to screen + if print_to_screen == True : + print "insert_to_ddb: %s" % insert_to_ddb + print "user_id : %s" % user_id + print "tweet_id : %s" % tweet_id + print "created_at : %s" % created_at + print "language : %s" % language + print "text : %s" % (text[:77] + '...' if len(text) > 80 else text) + print "\n===========================================" + + # Loop control + time.sleep(1.0/provisioned_wcu) diff --git a/aws-blog-dynamodb-analysis/tweets-streaming.py b/aws-blog-dynamodb-analysis/tweets-streaming.py new file mode 100644 index 00000000..877ff6c8 --- /dev/null +++ b/aws-blog-dynamodb-analysis/tweets-streaming.py @@ -0,0 +1,99 @@ +#!/usr/bin/python + +# Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Amazon Software License (the "License"). You may not use this file +# except in compliance with the License. A copy of the License is located at +# +# http://aws.amazon.com/asl/ +# +# or in the "license" file accompanying this file. This file is distributed on an "AS IS" +# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License +# for the specific language governing permissions and limitations under the License. + +# Import modules +from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream +import boto3 +import signal +import time +import sys + +# Twitter security credentials +ACCESS_TOKEN = "...01234..." +ACCESS_SECRET = "...i7RkW..." +CONSUMER_KEY = "...be4Ma..." +CONSUMER_SECRET = "...btcar..." + +# Global variables. +dynamodb_table = "TwitterAnalysis" +expires_after_days = 30 + +# Authenticate and initialize stream +oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET) +stream = TwitterStream(auth=oauth) +tweets = stream.statuses.sample() + +# Initiate DynamoDB client +client = boto3.client('dynamodb') + +# Signal handler, Ctrl+c to quit +def signal_handler(signal, frame): + print "\n" + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) + +# Routing. Also for easy block commenting. +insert_to_ddb = True; +print_to_screen = True; + +# Start the loop to get the tweets. +for tweet in tweets : + try : + # Get tweet data + user_id = tweet["user"]["screen_name"] + tweet_id = tweet["id_str"] + created_at = tweet["created_at"] + timestamp_ms = tweet["timestamp_ms"] + language = tweet["lang"] + text = tweet["text"] + hts = tweet["entities"]["hashtags"] + + # Expire items in the future, calculated in milliseconds + ttl_value = str((int(timestamp_ms)/1000)+(expire_after_days*86400000)) + + # Process hashtags + hashtags = ['None'] + if len(hts) != 0 : + hashtags.pop() + for ht in hts : + hashtags.append(str(ht["text"])) + + # Store tweet in DynamoDB + if insert_to_ddb == True : + res = client.put_item( + TableName=dynamodb_table, + Item={ + 'user_id' : { 'S' : user_id }, + 'tweet_id' : { 'N' : tweet_id }, + 'created_at': { 'S' : created_at }, + 'ttl_value' : { 'N' : ttl_value }, + 'language' : { 'S' : language }, + 'text' : { 'S' : text }, + 'hashtags' : { 'SS': hashtags } + }) + + # Print output to screen + if print_to_screen == True : + print "insert_to_ddb: %s" % insert_to_ddb + print "user_id : %s" % user_id + print "tweet_id : %s" % tweet_id + print "created_at : %s" % created_at + print "timestamp_ms : %s" % timestamp_ms + print "language : %s" % language + print "text : %s" % (text[:77] + '...' if len(text) > 80 else text) + print "hashtags : %s" % hashtags + print "\n===========================================" + + except Exception : + pass