diff --git a/mapper.py b/mapper.py index 9fa3def..0baf058 100644 --- a/mapper.py +++ b/mapper.py @@ -5,10 +5,13 @@ for line in sys.stdin: # remove leading and trailing whitespace line = line.strip() + line = line.lower() # split the line into words; splits on any whitespace words = line.split() + stopwords = set(['the','and','a','I','.',',','!','or','for','not','in','on','to','an','be','but','of','is','it']) # output tuples (word, 1) in tab-delimited format for word in words: + if word not in stopwords: print '%s\t%s' % (word, "1")