Skip to content
This repository was archived by the owner on Jul 20, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions aws-blog-dynamodb-analysis/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

Licensed under the Amazon Software License (the "License"). You may not use this file
except in compliance with the License. A copy of the License is located at

http://aws.amazon.com/asl/

or in the "license" file accompanying this file. This file is distributed on an "AS IS"
BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License
for the specific language governing permissions and limitations under the License.
2 changes: 2 additions & 0 deletions aws-blog-dynamodb-analysis/NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
aws-blog-dynamodb-analysis
Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
49 changes: 49 additions & 0 deletions aws-blog-dynamodb-analysis/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
tweets-streaming.py
-------------------

This script pulls random tweets from the Twitter API and stores them in Amazon DynamoDB. There are two modules needed to execute the script:

- boto3: https://aws.amazon.com/sdk-for-python/
- twitter: https://pypi.python.org/pypi/twitter/

A Twitter account is needed to access Twitter API. Go to https://www.twitter.com/ and sign up for a free account, if you don't already have one. Once your account is up, go to https://apps.twitter.com/ and on the main landing page, click the grey "Create New App" button. After you give it a name, you can go to the "Keys and Access Tokens" to get your credentials to use the Twitter API. You will need to generate Customer Tokens/Secret and Access Token/Secret. All four keys will be used to authenticate your request.

In the script, update the following lines with the real security credentials:

# Twitter security credentials
ACCESS_TOKEN = "...01234..."
ACCESS_SECRET = "...i7RkW..."
CONSUMER_KEY = "...be4Ma..."
CONSUMER_SECRET = "...btcar..."

This section can be customized according to your preference. Use your own table name and TTL value as desired:

# Global variables.
dynamodb_table = "TwitterAnalysis"
expires_after_days = 30



tweets-simulated.py
-------------------

This script generates simulated tweets and stores them in Amazon DynamoDB. There are three modules needed to execute the script:

- boto3: https://aws.amazon.com/sdk-for-python/
- names: https://pypi.python.org/pypi/names/
- loremipsum: https://pypi.python.org/pypi/loremipsum/

In the script, update the following lines with the real security credentials:

# Twitter security credentials
ACCESS_TOKEN = "...01234..."
ACCESS_SECRET = "...i7RkW..."
CONSUMER_KEY = "...be4Ma..."
CONSUMER_SECRET = "...btcar..."

This section can be customized according to your preference. Use your own table name and TTL value as desired:

# Global variables
dynamodb_table = "TwitterAnalysis"
provisioned_wcu = 1

75 changes: 75 additions & 0 deletions aws-blog-dynamodb-analysis/tweets-simulated.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/python

# Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Amazon Software License (the "License"). You may not use this file
# except in compliance with the License. A copy of the License is located at
#
# http://aws.amazon.com/asl/
#
# or in the "license" file accompanying this file. This file is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License
# for the specific language governing permissions and limitations under the License.

# Import modules
from loremipsum import get_sentences
import boto3
import names
import random
import string
import signal
import math
import time
import sys

# Global variables
dynamodb_table = "TwitterAnalysis"
provisioned_wcu = 1

# Initiate DynamoDB client
client = boto3.client('dynamodb')

# Signal handler, Ctrl+c to quit
def signal_handler(signal, frame):
print "\n"
sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)

# Actions
insert_to_ddb = True;
print_to_screen = True;

# Start the loop to generate simulated tweets
while(True) :
# Generate fake tweet
user_id = names.get_first_name()
tweet_id = str(random.randint(pow(10,16),pow(10,17)-1))
created_at = time.strftime("%a %b %d %H:%M:%S +0000 %Y", time.gmtime())
language = random.choice(['de', 'en', 'es', 'fr', 'id', 'nl', 'pt', 'sk'])
text = str(get_sentences(1)[0])

# Store tweet in DynamoDB
if insert_to_ddb == True :
res = client.put_item(
TableName=dynamodb_table,
Item={
'user_id' : { 'S' : user_id },
'tweet_id' : { 'N' : tweet_id },
'created_at': { 'S' : created_at },
'language' : { 'S' : language },
'text' : { 'S' : text }
})

# Print output to screen
if print_to_screen == True :
print "insert_to_ddb: %s" % insert_to_ddb
print "user_id : %s" % user_id
print "tweet_id : %s" % tweet_id
print "created_at : %s" % created_at
print "language : %s" % language
print "text : %s" % (text[:77] + '...' if len(text) > 80 else text)
print "\n==========================================="

# Loop control
time.sleep(1.0/provisioned_wcu)
99 changes: 99 additions & 0 deletions aws-blog-dynamodb-analysis/tweets-streaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/python

# Copyright 2017-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Amazon Software License (the "License"). You may not use this file
# except in compliance with the License. A copy of the License is located at
#
# http://aws.amazon.com/asl/
#
# or in the "license" file accompanying this file. This file is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License
# for the specific language governing permissions and limitations under the License.

# Import modules
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterStream
import boto3
import signal
import time
import sys

# Twitter security credentials
ACCESS_TOKEN = "...01234..."
ACCESS_SECRET = "...i7RkW..."
CONSUMER_KEY = "...be4Ma..."
CONSUMER_SECRET = "...btcar..."

# Global variables.
dynamodb_table = "TwitterAnalysis"
expires_after_days = 30

# Authenticate and initialize stream
oauth = OAuth(ACCESS_TOKEN, ACCESS_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
stream = TwitterStream(auth=oauth)
tweets = stream.statuses.sample()

# Initiate DynamoDB client
client = boto3.client('dynamodb')

# Signal handler, Ctrl+c to quit
def signal_handler(signal, frame):
print "\n"
sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)

# Routing. Also for easy block commenting.
insert_to_ddb = True;
print_to_screen = True;

# Start the loop to get the tweets.
for tweet in tweets :
try :
# Get tweet data
user_id = tweet["user"]["screen_name"]
tweet_id = tweet["id_str"]
created_at = tweet["created_at"]
timestamp_ms = tweet["timestamp_ms"]
language = tweet["lang"]
text = tweet["text"]
hts = tweet["entities"]["hashtags"]

# Expire items in the future, calculated in milliseconds
ttl_value = str((int(timestamp_ms)/1000)+(expire_after_days*86400000))

# Process hashtags
hashtags = ['None']
if len(hts) != 0 :
hashtags.pop()
for ht in hts :
hashtags.append(str(ht["text"]))

# Store tweet in DynamoDB
if insert_to_ddb == True :
res = client.put_item(
TableName=dynamodb_table,
Item={
'user_id' : { 'S' : user_id },
'tweet_id' : { 'N' : tweet_id },
'created_at': { 'S' : created_at },
'ttl_value' : { 'N' : ttl_value },
'language' : { 'S' : language },
'text' : { 'S' : text },
'hashtags' : { 'SS': hashtags }
})

# Print output to screen
if print_to_screen == True :
print "insert_to_ddb: %s" % insert_to_ddb
print "user_id : %s" % user_id
print "tweet_id : %s" % tweet_id
print "created_at : %s" % created_at
print "timestamp_ms : %s" % timestamp_ms
print "language : %s" % language
print "text : %s" % (text[:77] + '...' if len(text) > 80 else text)
print "hashtags : %s" % hashtags
print "\n==========================================="

except Exception :
pass