#Run the function on spark rdd as transformation #Run the summation as reduce action #finally compute the square root. For which we need to import math.
#This will load the bigtextfile.txt as RDD in the spark lines = sc.textFile("hdfs://namenode:9000/user/kalyan/bigtextfile.txt"); #define a function that can break each line into words def toWords(line): return line.split(); # Run the toWords function on each element of RDD on spark as flatMap transformation. # We are going to flatMap instead of map because our function is returning multiple values. words = lines.flatMap(toWords); # Convert each word into (key, value) pair. Her key will be the word itself and value will be 1. def toTuple(word): return (word, 1); wordsTuple = words.map(toTuple); # Now we can easily do the reduceByKey() action. def sum(x, y): return x+y; counts = wordsTuple.reduceByKey(sum) # Now, print counts.collect()