from pyspark.sql.functions import split, explode,col df = spark.read.text("/FileStore/tables/words-1.txt").toDF("line") words = df.select(explode(split('line',' ')).alias("words")).filter(col('words') !="").groupBy('words').count() words.show()