0% found this document useful (0 votes)
7 views

Dele

code

Uploaded by

Purushotham
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views

Dele

code

Uploaded by

Purushotham
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

val data = List(

"State->TN~City->Chennai",
"State->UP~City->Lucknow"
)

val lis = List( "A~B" , "C~D" , "E~F" )

println
println("=====Raw List=====")
println
println(lis)
lis.foreach(println)

val split = lis.flatMap( x => x.split("~"))

println
println("=====split List=====")
println
println(split)
split.foreach(println)

package pack

object obj {
def main(args:Array[String]):Unit={

println("=====Started=====")

val data = List(


"State->TN~City->Chennai",
"State->UP~City->Lucknow"
)
println
println("=====Raw Data=====")
println
data.foreach(println)

val flat = data.flatMap( x => x.split("~"))


println
println("=====Splitted Data=====")
println
flat.foreach(println)

val statefil = flat.filter(x => x.contains("State"))


println
println("=====State Data=====")
println
statefil.foreach(println)
val cityfil = flat.filter(x=>x.contains("City"))
println
println("=====City Data=====")
println
cityfil.foreach(println)

val Statefinal = statefil.map(x=>x.replace("State->",""))


println
println("=====Final State Data=====")
println
Statefinal.foreach(println)

val Cityfinal = cityfil.map(x=>x.replace("City->",""))


println
println("=====Final City Data=====")
println
Cityfinal.foreach(println)
}
}

"file:///F:/Practice/Data/datatxns"

============Reading USDATA ==================

package pack

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object obj {

def main(args:Array[String]):Unit={

println("===started===")
println

val conf = new


SparkConf().setAppName("wcfinal").setMaster("local[*]").set("spark.driver.host","lo
calhost").set("spark.driver.allowMultipleContexts", "true")

val sc = new SparkContext(conf) // RDD

sc.setLogLevel("ERROR")

val spark =
SparkSession.builder().config(conf).getOrCreate() //Dataframe
val data = sc.textFile("file:///D:/data/usdata",1)

data.take(10).foreach(println)

val lendata = data.filter(x => x.length>200)

println
println("===len > 200===")
println
lendata.foreach(println)

val flatdata = lendata.flatMap( x => x.split(","))

println
println("===flatdata===")
println
flatdata.foreach(println)

val repdata = flatdata.map( x => x.replace("-",""))

println
println("===repdata===")
println
repdata.foreach(println)

val condata = repdata.map( x => x + ",zeyo")

println
println("===condata===")
println
condata.foreach(println)

==================Column based filter/Schema rdd filter========================

package pack
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

object obj {

case class columns(id:String,category:String,product:String)

def main(args:Array[String]):Unit={

println("===started===")
println

val conf = new


SparkConf().setAppName("wcfinal").setMaster("local[*]")

val sc = new SparkContext(conf) // RDD

sc.setLogLevel("ERROR")

val spark =
SparkSession.builder().config(conf).getOrCreate() //Dataframe

val data = sc.textFile("file:///F:/Practice/Data/datatxns")

data.foreach(println)
val split=data.map(x=>x.split(","))
val schemardd=split.map(x=>columns(x(0),x(1),x(2)))
val filrdd=schemardd.filter(x=>x.product.contains("Gymnastics"))
println
filrdd.foreach(println)

You might also like