0% found this document useful (0 votes)
26 views

ALL Podcast Queries-Ajeet

The document contains various SQL queries related to analyzing music and podcast consumption data from Gaana. Some of the queries summarize playback logs to calculate metrics like total users, plays and duration by date. Other queries join playback data with metadata tables to analyze consumption by show, episode, language, app version and user demographics.

Uploaded by

Apple
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views

ALL Podcast Queries-Ajeet

The document contains various SQL queries related to analyzing music and podcast consumption data from Gaana. Some of the queries summarize playback logs to calculate metrics like total users, plays and duration by date. Other queries join playback data with metadata tables to analyze consumption by show, episode, language, app version and user demographics.

Uploaded by

Apple
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 44

Tables:

`music_analytics_archived.ma_track_play_log_detailed_*` p
Gaana-bigquery-1315.music_analytics.ma_podcast_data
Gaana-bigquery-1315:music_analytics.tm_offline_trackslog_18082018
Music.
Tm_podcast_seasons (table of mapping b/w shows and episodes)
Tm_track (individual tracks and podcasts (sapid==1))
Tm_podcasts_show (all show data)
Tm_vendor (list of all unique vendors)
Tm_artistF (list of all vendors Artist)
Podcast_show_tags
Tm_tag_val_master (Type I and Type II)
Tm_user_details (logged in Users info)
App cast ids (Only spoken audio pieces)
Podcast_L30
gaana-bigquery-1315:acoe_gaana.Cohort_L30

SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type


FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'

SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type


FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
Master query:
select
month,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select format_date('%Y%m',cast(timestamp(time_played_on) as date)) as month,
device_id, track_id,
Sum (cast (duration as Float64))as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-06-01')) and
format_date('%Y%m%d', date('2021-06-01')))
--d cast (duration as Float64)>30 --To take only those plays where duration > 30s
--d track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast')) --
To take all podcast tracks ONLY
group by 1,2,3
) a
join(
select j.show_id as show_id, k.title as show_title,language, categories, release_date,
j.tid as episode_id, i.title as episode_name,
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language,
categories,date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
) b
on safe_cast(a.track_id as int64) = b.episode_id
--ere show_id in ("62531")
group by 1
App version category wise plays:
select
--categories,
platform,
gaana_app_version,
date,--client_country_code,--count(client_country_code) as
no_of_plays_in_each_country,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from
(
select
case
when platform in (5,87,112) then 'AOS'
when platform in (3) then 'iOS'
when platform in (1,21) then 'WEB'
when platform in (90,92) then 'WAP'
when platform in (131) then 'MiniApp'
else 'Others'
end as platform,
cast(timestamp(time_played_on) as date) as date,
device_id,track_id,gaana_app_version,--client_country_code,
sum(duration) as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-06-30')) and
format_date('%Y%m%d', date('2021-06-30')))
--and duration>30 --To take only those plays where duration > 30s
and safe_cast(track_id as int64) in (select id from `music.tm_track` where
lower(sapid) in ('podcast')) -- To take all podcast tracks ONLY
--and client_country_code in ("US")
--where created_on between '2021-06-01' and '2021-06-30'
group by 1,2,3,4,5
)

group by 1,2,3
order by date desc
Vendor details of APP cast tracks:
select show_id, show_title,
app_cast_id,a.track_id,vendor_id,vendor_name,duration,song_title,track_order,created_o
n,is_active
from (
select j.show_id as show_id, k.title as
show_title,app_cast_id,track_id,duration,track_title,
track_order,created_on,is_active
--j.tid as episode_id, i.title as episode_name,
from (
select app_cast_id,track_id,duration,track_title,track_order,created_on,is_active
from `music.tm_app_cast`
--where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on safe_cast(i.app_cast_id as int64) = j.tid
left join (
select id, title
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
--where show_id in ("1947")
) a
left join(
select i.id as track_id,i.title as song_title, j.name as vendor_name,j.id as vendor_id
--j.tid as episode_id, i.title as episode_name,
from (
select *
from `music.tm_track` --where title in ("Baghon Mein Bahar Hai")
) i
join
(
select id, name
from `music.tm_vendor`
) j
on safe_cast(i.vendor_id as int64) = j.id
) b
on safe_cast(a.track_id as int64) = b.track_id
group by 1,2,3,4,5,6,7,8,9,10,11
Music and Podcast artist count status wise:
select id, title, vendor_id, vendor_name,is_migrated,link,status
from `music.tm_podcasts_show` where vendor_name in ("Gaana Promo")
id,name,dob,gender,num_songs,num_albums,languages,popall,total_favourite_count,
select distinct (status), count (status)

select distinct (Artist_status), count (Artist_status) from (select


case
when is_podcast_artist="1" and status="1" then 'A_P_a'
when is_podcast_artist="0" and status="1" then 'A_M_a'
when is_podcast_artist="1" and status="0" then 'D_P_a'
when is_podcast_artist="0" and status="0" then 'D_M_a'
when is_podcast_artist="1" and status="-2" then 'De_P_a'
when is_podcast_artist="0" and status="-2" then 'De_M_a'
end as Artist_status
from `music.tm_artist` )
group by 1
select count (status) from `music.tm_artist`where status in ("0")

Show and episode level artist:


Select j.show_id as show_id, k.title as show_title,k.artists_list,i.id as episode_id,
i.title as episode_name,i.artists_list,release_date,j.album_id,j.season,
from ((
select id, title,seokey,album,albumseokey,artists_list
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season,album_id,
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
left join (
select id, title,artists_list,date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
)
where show_id in ("1947")
Consumption section wise:
select language,date,
--show_title ,
count(distinct device_id) as total_users, sum(plays) as total_plays,
round((sum(duration)/3600),2) as total_duration_Hrs
from (
select safe_cast(timestamp(time_played_on) as date) as date,
device_id, track_id,section_id,Sum (cast (duration as Float64))as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
where (_table_suffix between format_date('%Y%m%d', date('2021-09-12')) and
format_date('%Y%m%d', date('2021-09-26')))
and section_id in ()
--and cast (duration as Float64)>15 --To take only those plays where duration > 30s
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast')) --
To take all podcast tracks ONLY
group by 1,2,3,4
) a
join(
select j.show_id as show_id, k.title as show_title,language,-- categories,
release_date,
i.id as episode_id,status-- i.title as episode_name,
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast') --and id in
(34602208,38068155,37453797,33857609,37919496,37919513)
) i
inner join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season,status
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
inner join (
select id, title, trim(lower(language)) as language--,
categories,date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`-- where title in ("Ansuna Sach with Jay
Alani","Kahaani Rooh Ki","Kahanibaaz","Nanhi Chudail",
--"Shayad","Ajeeb Dastaan","Behad","Dil Se Pallavi Ke Saath","Ek Rang Ishq Ka","Love
Aaj Kal")
) k
on safe_cast(j.show_id as int64) = k.id
) b
on safe_cast(a.track_id as int64) = b.episode_id
group by 1
order by date
Demographic consumption:
select
gender, case
when safe_cast(yob as int64) >= 1997 then 'Below 30'
when safe_cast(yob as int64) <1997 then 'Above 30'
end as Age,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select
user_id,device_id,
sum(duration) as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-05-01')) and
format_date('%Y%m%d', date('2021-05-31')))
and track_id in ( select tid as track_id from (
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
left join (
select id, title,vendor_name, trim(lower(language)) as language,
categories,date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
where vendor_name in ("Hubhopper")
)
group by 1,2
) a
join(
select user_id, gender,yob
from `music.tm_user_details`
) b
on safe_cast(a.user_id as int64) = safe_cast(b.user_id as int64)
group by 1,2
Show/Episode wise completion rate:
select
show_id, show_title, episode_id, episode_name ,
AVG ( cast (duration as Float64) *100/safe_cast(song_duration as Float64)) as
listen_com,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select track_id as episode_ids,device_id,count(1) as plays,
sum(duration)as duration,
from `music_analytics_archived.ma_track_play_log_detailed_*` p
where (_table_suffix between format_date('%Y%m%d', date('2021-05-01')) and
format_date('%Y%m%d', date('2021-06-29')))
and duration>60 ---To take only those plays where duration > 30s
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast')) --
To take all podcast tracks ONLY
group by 1,2
) a
join(
select j.show_id as show_id, k.title as show_title,i.duration as song_duration,
release_date,
j.tid as episode_id, i.title as episode_name,
from (
select id, title,duration
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
left join (
select id, title, date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
) b
on a.episode_ids = b.episode_id
where show_id in ("23681")
group by 1,2,3,4
Show wise consumption gaana_device_ids:
select a.*, idfa as ga_id
from
(
select date, device_id,
show_title, season, language, categories,
sum(plays) as plays, sum(duration)/60 as duration_minutes
from
(select date(timestamp(time_played_on)) as date,
device_id, track_id,
count(*) as plays, sum(duration) as duration,
from `music_analytics_archived.ma_track_play_log_detailed_*`
where (_table_suffix between format_date('%Y%m%d', date('2021-05-01')) and
format_date('%Y%m%d', date('2021-05-31')))
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3) i
join (select show_id, cast(track_id as int64) as tid, season from
`music.tm_podcast_seasons` group by 1,2,3) j
on i.track_id = j.tid
join (select id, title as show_title, language, categories from
`music.tm_podcasts_show`where id in (16714) ) k
on cast(j.show_id as int64) = k.id
group by 1,2,3,4,5,6
) a
left join
(
select md5_device_id, idfa from `gaana_activity_logs.tm_device_settings`
where idfa != '' and idfa is not null
group by 1,2
) b
on a.device_id = b.md5_device_id
Listenership trend hourly basis:
select
show_id, show_title, categories, language, release_date,
case
when play_hour>=0 and play_hour<3 then '0to3'
when play_hour>=3 and play_hour<6 then '3to6'
when play_hour>=6 and play_hour<9 then '6to9'
when play_hour>=9 and play_hour<12 then '9to12'
when play_hour>=12 and play_hour<15 then '12to15'
when play_hour>=15 and play_hour<18 then '15to18'
when play_hour>=18 and play_hour<21 then '18to21'
when play_hour>=21 and play_hour<24 then '21to24'
end as time_bucket,
count(distinct device_id) as total_users, sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select
extract(hour from timestamp(time_played_on)) as play_hour,device_id, track_id,
sum(duration) as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-04-01')) and
format_date('%Y%m%d', date('2021-04-30')))
and duration>30 --To take only those plays where duration > 30s
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
) a
join(
select j.show_id as show_id, k.title as show_title,language, categories, release_date,
j.tid as episode_id, i.title as episode_name,
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language,
categories,date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
) b
on a.track_id = b.episode_id
where show_id in ("18043", "14311","6466","16362","4173","6474","3235","4217")
group by 1,2,3,4,5,6
order by 1,2,3,4,5,6
Tag/Category wise section wise consumption:
select
--show_id,show_title,status,language, categories,release_date,
date,
tag_val_name,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select safe_cast(timestamp(time_played_on) as date) as date,
device_id, track_id,
sum(duration) as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-09-27')) and
format_date('%Y%m%d', date('2021-10-05')))
and section_id in (21006,4992)
and track_id in (
select j.tid as track_id
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
join
(select distinct (podcast_show_id )from `music.podcast_show_tags`
Where safe_cast(tag_id as int64) in (select id from `music.tm_tag_val_master`
where tag_val_name in ("Horror","Romance") --and podcast_show_id in ("2924")
and is_category in ("0") and is_available in ("2","3") and is_active in ("1") )
) k
on j.show_id = k.podcast_show_id
--where categories like ("%Business%") or categories like ("%Technology%")
--or show_id in (Select distinct(podcast_show_id) from music.podcast_show_tags where
tag_id in ("1228","1554") )
)
group by 1,2,3
)a
join
(
select distinct(j.show_id ) as show_id, k.title as
show_title,tag_val_name,status,language, categories,release_date,
j.tid as episode_id,
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
inner join
(
select show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
inner join(
Select id, title,tag_val_name, language, release_date,status,categories
from (
(select id, title, trim(lower(language)) as language,
categories,date(timestamp(created_on)) as release_date,status
from `music.tm_podcasts_show`) n
inner join
(select podcast_show_id,tag_val_name
from (
(select tag_id,podcast_show_id from `music.podcast_show_tags`)e
--Where safe_cast(tag_id as int64) in
inner join
(select id,tag_val_name from `music.tm_tag_val_master`
where tag_val_name in ("Horror","Romance")
and is_category in ("0") and is_available in ("2","3") and is_active in ("1") )f
on safe_cast(e.tag_id as int64) = f.id
) )m
on n.id = safe_cast(m.podcast_show_id as int64)
)) k
on safe_cast(j.show_id as int64) = k.id
) b
on safe_cast(a.track_id as int64) = b.episode_id
--where show_id in ("2429")
group by 1,2
--order by total_duration_mins DESC
Section wise consumption data:
Select
section_id, section_name,
date,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select
cast(timestamp(time_played_on) as date) as date,
device_id, section_id,
sum(duration) as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-04-01')) and
format_date('%Y%m%d', date('2021-04-29')))
and duration>30 --To take only those plays where duration > 30s
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast')) --
To take all podcast tracks ONLY
group by 1,2,3
) a
join(
select id, name as section_name
from `music_analytics.tm_sections`
-- where lower(sapid) in ('podcast')
) b
on a.section_id = b.id
where section_id = 21014
group by 1,2,3
order by 1,2,3,4,5,6

Page/section filter consumption trend:


select
show_id, show_title , language,date ,
count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from (
select safe_cast(timestamp(time_played_on) as date) as date,
device_id, track_id,
Sum (cast (duration as Float64))as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-07-01')) and
format_date('%Y%m%d', date('2021-07-01')))
and section_id in (242) and page_id in (1,984)
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
) a
join(
select j.show_id as show_id, k.title as show_title,language, categories, release_date,
j.tid as episode_id, i.title as episode_name,
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language,
categories,date(timestamp(created_on)) as release_date
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
) b
on safe_cast(a.track_id as int64) = b.episode_id
--ere show_id in ("62531")
group by 1,2,3,4

Top 500 Music artist playwise:


select -- track_id,Track_name,
--count(distinct device_id) as total_users,
Artist_id,Artist_name, sum(plays) as total_plays,
--round((sum(duration)/60),2) as total_duration_mins
from (
select track_id as episode_ids,--device_id,
count(1) as plays,sum(duration)as duration,
from `music_analytics_archived.ma_track_play_log_detailed_*` p
where (_table_suffix between format_date('%Y%m%d', date('2021-07-01')) and
format_date('%Y%m%d', date('2021-07-31')))
--and duration>60 ---To take only those plays where duration > 30s
and track_id not in (select id from `music.tm_track` where lower(sapid) in
('podcast')) -- To take all podcast tracks ONLY
group by 1
) a
join(
select track_id, Track_name,Artist_id,Artist_name
from (

Select id as track_id, title as Track_name,Artist_id


from
( select id, title,duration,split(artists,',') as artists,--artists_list
from `music.tm_track` --where id in (19818795,19826291)
)a
CROSS JOIN UNNEST(a.artists ) AS Artist_id
--where artists_list like '%Arijit Singh%'
)c
join(
select id, name as Artist_name from music.tm_artist )d
on safe_cast(c.Artist_id as int64) = d.id
)b
on a.episode_ids = b.track_id
group by 1,2
order by total_plays DESC
limit 1

US listenership data:
select
--show_id,
show_title,episode_name,--section_id,songtime_foreground,songtime_player,time_played_o
n,
--categories, language,release_date,date,
device_id,--type,--lient_country_code,--count(client_country_code) as
no_of_plays_in_each_country,
--count(distinct device_id) as total_users,
sum(plays) as total_plays,
round((sum(duration)/60),2) as total_duration_mins
from ((
select
--safe_cast(timestamp(time_played_on) as date) as
date,time_played_on,section_id,songtime_foreground,songtime_player,
device_id, cast (track_id as int64) as track_id,--client_country_code,
sum(duration) as duration, -- 'online' as type,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
where (_table_suffix between format_date('%Y%m%d', date('2021-01-01')) and
format_date('%Y%m%d', date('2021-01-31')))
--and duration>30 --To take only those plays where duration > 30s
and safe_cast(track_id as int64) in (select id from `music.tm_track` where
lower(sapid) in ('podcast')) -- To take all podcast tracks ONLY
and client_country_code in ("US")
group by 1,2
)
) a
join(
select j.show_id as show_id, k.title as show_title,language, categories,
release_date,status,
j.tid as episode_id, i.title as episode_name,
from (
select id, title
from `music.tm_track`
where lower(sapid) in ('podcast')
) i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
) j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language,
categories,date(timestamp(created_on)) as release_date,status,
from `music.tm_podcasts_show`
) k
on safe_cast(j.show_id as int64) = k.id
) b
on a.track_id = b.episode_id
Where show_id in ("1947"
)
group by 1
Podcast Scoring 3.5 month overall performance

--Show level

Declare start_date_1,end_date_1 DATE;


SET (start_date_1,end_date_1) = ('2021-01-01','2021-04-15');

select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/total_tracks_length,2) as completion_perc,
followers_past_week as followers_past_week,
total_podcast_show_followers as total_followers,
date_diff(end_date_1,release_date,day) as age_on_lastday

from(
select
show_id, show_title, categories, language, show_length, show_episode_count,
release_date, --popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,
sum(track_length) as total_tracks_length

from (
select device_id, track_id,q.track_length,
sum(duration) as overall_listening_time,
count(1) as overall_plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
join (select DISTINCT id,safe_cast(duration as int64) as track_length from `music.tm_track`
where lower(sapid) in ('podcast')) q on p.track_id = q.id

where (_table_suffix between format_date('%Y%m%d', date(start_date_1)) and


format_date('%Y%m%d', date(end_date_1)))
and track_length>0
and duration>30
--and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
)a
join
(
select j.show_id as show_id, k.title as show_title,language, categories, release_date,
j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
count(j.tid) over(partition by show_id) as show_episode_count,
sum(i.track_length) over(partition by show_id) as show_length,
--sum(cast(popall as int64)) over(partition by show_id) as popall,
--sum(cast(pop30days as int64)) over(partition by show_id) as pop30days,
--sum(cast(pop7days as int64)) over(partition by show_id) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length--, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories,date(timestamp(created_on)) as
release_date
from `music.tm_podcasts_show`
)k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 and b.release_date < end_date_1
group by 1,2,3,4,5,6,7
)X

--NEW FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as followers_past_week,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between
date_add(end_date_1,INTERVAL -7 DAY) and date_add(end_date_1,INTERVAL -1 DAY)
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
date_add(end_date_1,INTERVAL -7 DAY) and date_add(end_date_1,INTERVAL -1 DAY)
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id

--TOTAL FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as total_podcast_show_followers,
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'
group by 1
))
group by 1
)Z
on X.show_id = Z.entity_id

--Show-season level
--Podcast all users all plays performance
Declare start_date_1,end_date_1 DATE;
SET (start_date_1,end_date_1) = ('2021-01-01','2021-04-15');

select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/total_tracks_length,2) as completion_perc,
followers_past_week as followers_past_week,
total_podcast_show_followers as total_followers,
date_diff(end_date_1,season_start,day) as age_on_lastday

from(
select
show_id, show_title, season, categories, language, show_length, show_episode_count,
season_start, popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,
sum(track_length) as total_tracks_length

from (
select device_id, track_id,q.track_length,
sum(duration) as overall_listening_time,
count(1) as overall_plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
join (select DISTINCT id,safe_cast(duration as int64) as track_length from `music.tm_track`
where lower(sapid) in ('podcast')) q on p.track_id = q.id

where (_table_suffix between format_date('%Y%m%d', date(start_date_1)) and


format_date('%Y%m%d', date(end_date_1)))
and track_length>0 and duration>30
--and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
)a
join
(
select j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
j.show_id as show_id, k.title as show_title, season, language, categories, season_start,
count(j.tid) over(partition by show_id,season) as show_episode_count,
sum(i.track_length) over(partition by show_id,season) as show_length,
sum(cast(popall as int64)) over(partition by show_id,season) as popall,
sum(cast(pop30days as int64)) over(partition by show_id,season) as pop30days,
sum(cast(pop7days as int64)) over(partition by show_id,season) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id,tid,season,MIN(release_date) Over (partition by show_id,season order
by release_date) as season_start from (
select distinct show_id, safe_cast(track_id as int64) as tid, season,
date(timestamp(created_on)) as release_date
from `music.tm_podcast_seasons` )
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 and b.season_start < end_date_1
group by 1,2,3,4,5,6,7,8,9,10,11
)X

--NEW FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as followers_past_week,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between
date_add(end_date_1,INTERVAL -7 DAY) and date_add(end_date_1,INTERVAL -1 DAY)
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
date_add(end_date_1,INTERVAL -7 DAY) and date_add(end_date_1,INTERVAL -1 DAY)
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id

--TOTAL FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as total_podcast_show_followers,
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'
group by 1
))
group by 1
)Z
on X.show_id = Z.entity_id

--Podcast all users all plays performance

select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/(total_users*show_length),2) as completion_perc,
followers_past_week as followers_past_week,
total_podcast_show_followers as total_followers
from(
select
show_id, show_title, season, categories, language, show_length, show_episode_count,
season_start, popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,

from (
select device_id, track_id,
sum(duration) as overall_listening_time,
count(1) as overall_plays
from `music_analytics_archived.ma_track_play_log_detailed_*`
where _table_suffix between format_date('%Y%m%d', date('2021-01-01')) and
format_date('%Y%m%d', date('2021-04-13'))
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2
)a
join
(
select j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
j.show_id as show_id, k.title as show_title, season, language, categories, season_start,
count(j.tid) over(partition by show_id) as show_episode_count,
sum(i.track_length) over(partition by show_id) as show_length,
sum(cast(popall as int64)) over(partition by show_id) as popall,
sum(cast(pop30days as int64)) over(partition by show_id) as pop30days,
sum(cast(pop7days as int64)) over(partition by show_id) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id,tid,season,MIN(release_date) Over (partition by show_id,season order
by release_date) as season_start from (
select distinct show_id, safe_cast(track_id as int64) as tid, season,
date(timestamp(created_on)) as release_date
from `music.tm_podcast_seasons` )
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 and b.season_start <= date_add(CURRENT_DATE(),INTERVAL -1
MONTH)
group by 1,2,3,4,5,6,7,8,9,10,11
)X

--NEW FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as followers_past_week,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between
date_add(CURRENT_DATE(),INTERVAL -7 DAY) and date_add(CURRENT_DATE(),INTERVAL
-1 DAY)
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
date_add(CURRENT_DATE(),INTERVAL -7 DAY) and date_add(CURRENT_DATE(),INTERVAL
-1 DAY)
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id

--TOTAL FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as total_podcast_show_followers,
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'
group by 1
))
group by 1
)Z
on X.show_id = Z.entity_id

--Podcast considering only those where play duration > 30s

select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/(total_users*show_length),2) as completion_perc,
followers_past_week as followers_past_week,
total_podcast_show_followers as total_followers
from(
select
show_id, show_title, season, categories, language, show_length, show_episode_count,
season_start, popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,

from (
select device_id, track_id,
sum(duration) as overall_listening_time,
count(1) as overall_plays
from `music_analytics_archived.ma_track_play_log_detailed_*`
where _table_suffix between format_date('%Y%m%d', date('2021-01-01')) and
format_date('%Y%m%d', date('2021-04-13'))
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
and duration > 30
group by 1,2
)a
join
(
select j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
j.show_id as show_id, k.title as show_title, season, language, categories, season_start,
count(j.tid) over(partition by show_id) as show_episode_count,
sum(i.track_length) over(partition by show_id) as show_length,
sum(cast(popall as int64)) over(partition by show_id) as popall,
sum(cast(pop30days as int64)) over(partition by show_id) as pop30days,
sum(cast(pop7days as int64)) over(partition by show_id) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id,tid,season,MIN(release_date) Over (partition by show_id,season order
by release_date) as season_start from (
select distinct show_id, safe_cast(track_id as int64) as tid, season,
date(timestamp(created_on)) as release_date
from `music.tm_podcast_seasons` )
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 and b.season_start <= date_add(CURRENT_DATE(),INTERVAL -1
MONTH)
group by 1,2,3,4,5,6,7,8,9,10,11
)X

--NEW FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as followers_past_week,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between
date_add(CURRENT_DATE(),INTERVAL -7 DAY) and date_add(CURRENT_DATE(),INTERVAL
-1 DAY)
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
date_add(CURRENT_DATE(),INTERVAL -7 DAY) and date_add(CURRENT_DATE(),INTERVAL
-1 DAY)
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id

--TOTAL FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as total_podcast_show_followers,
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'
group by 1
))
group by 1
)Z
on X.show_id = Z.entity_id

Podcasts Exposure Performance


--Considering users only via promoted sections
select X.*,
round(overall_listening_time/(60*pm_users),2) as mpu,
round(overall_listening_time/(pm_users*show_length),2) as completion_perc,
podcast_show_followers as new_followers_overall
from(
select
show_id, show_title, categories, language, show_length, show_episode_count,
release_date, popall, pop30days, pop7days,
count(distinct device_id) as pm_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,
sum(pm_section_plays) as pm_section_plays,
sum(pm_section_listening_time) as pm_section_listening_time,

from (
select p.* from(
select *,
sum(pm_section_listening_time) over(partition by device_id,track_id) as overall_listening_time,
sum(pm_section_plays) over (partition by device_id,track_id) as overall_plays --getting duration
& plays from all sections for the users who visited through promoted sections
from(
select device_id, track_id, section_id,
sum(duration) as pm_section_listening_time,
count(1) as pm_section_plays
from `music_analytics_archived.ma_track_play_log_detailed_*`
where _table_suffix between format_date('%Y%m%d', date('2021-04-06')) and
format_date('%Y%m%d', date('2021-04-12'))
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
)) p

join
(select section_id as id from `avnish.promoted_sections`
--where lower(name) like 'search%' or lower(name) like 'homecarousel%'
)q
on p.section_id = q.id
order by device_id,track_id,overall_plays desc
)a
join
(
select j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
j.show_id as show_id, k.title as show_title, language, categories, release_date,
count(j.tid) over(partition by show_id) as show_episode_count,
sum(i.track_length) over(partition by show_id) as show_length,
sum(cast(popall as int64)) over(partition by show_id) as popall,
sum(cast(pop30days as int64)) over(partition by show_id) as pop30days,
sum(cast(pop7days as int64)) over(partition by show_id) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join (
select show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
where season = '1'
group by 1,2,3) j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories, date(timestamp(created_on))
as release_date
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0
group by 1,2,3,4,5,6,7,8,9,10
)X
left join
(SELECT entity_id,
sum(podcast_followers) as podcast_show_followers,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between date('2021-04-06') and
date('2021-04-12')
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
date('2021-04-06') and date('2021-04-12')
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id

--Considering All Users


select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/(total_users*show_length),2) as completion_perc,
podcast_show_followers as new_followers_overall
from(
select
show_id, show_title, categories, language, show_length, show_episode_count,
release_date, popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,

from (
select device_id, track_id,
sum(duration) as overall_listening_time,
count(1) as overall_plays
from `music_analytics_archived.ma_track_play_log_detailed_*`
where _table_suffix between format_date('%Y%m%d', date('2021-04-06')) and
format_date('%Y%m%d', date('2021-04-12'))
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2
)a
join
(
select j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
j.show_id as show_id, k.title as show_title, language, categories, release_date,
count(j.tid) over(partition by show_id) as show_episode_count,
sum(i.track_length) over(partition by show_id) as show_length,
sum(cast(popall as int64)) over(partition by show_id) as popall,
sum(cast(pop30days as int64)) over(partition by show_id) as pop30days,
sum(cast(pop7days as int64)) over(partition by show_id) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join (
select show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
where season = '1'
group by 1,2,3) j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories, date(timestamp(created_on))
as release_date
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0
group by 1,2,3,4,5,6,7,8,9,10
)X
left join
(SELECT entity_id,
sum(podcast_followers) as podcast_show_followers,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between date('2021-04-06') and
date('2021-04-12')
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
date('2021-04-06') and date('2021-04-12')
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id
TRENDING PODCASTS PERFORMANCE QUERY
--Trending Podcasts Show level

Declare start_date_1,end_date_1,season_start_1 DATE;


SET (start_date_1,end_date_1,season_start_1) = ('2021-04-26','2021-04-26','2021-04-26');

select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/total_tracks_length,2) as completion_perc,
coalesce(followers_past_week,0) as followers_past_week,
total_podcast_show_followers as total_followers,
date_diff(end_date_1,release_date,day)/30 as age_in_months,
coalesce(likes_past_week,0) as likes_past_week

from(
select
show_id, show_title, categories, language, show_length, show_episode_count,
release_date,-- popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,
sum(track_length) as total_tracks_length,
sum(search_plays) as total_search_plays

from (
select device_id, track_id,q.track_length,
sum(duration) as overall_listening_time,
count(1) as overall_plays,
sum(case when (section_id in (select id from `music_analytics.tm_sections` where lower(name)
like 'searchautosuggest%')) then 1 else 0 end) as search_plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
join (select DISTINCT id,safe_cast(duration as int64) as track_length from `music.tm_track`
where lower(sapid) in ('podcast')) q on p.track_id = q.id

where (_table_suffix between format_date('%Y%m%d', date(start_date_1)) and


format_date('%Y%m%d', date(end_date_1)))
and track_length>0
and section_id not in (select id from `music_analytics.tm_sections` where lower(name) like
'popularpodcasts') --exclude plays from trending section

--and duration>30
--and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
)a
join
(
select j.show_id as show_id, k.title as show_title,language, categories, release_date,
j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
count(j.tid) over(partition by show_id) as show_episode_count,
sum(i.track_length) over(partition by show_id) as show_length,
--sum(cast(popall as int64)) over(partition by show_id) as popall,
--sum(cast(pop30days as int64)) over(partition by show_id) as pop30days,
--sum(cast(pop7days as int64)) over(partition by show_id) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length--, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories,date(timestamp(created_on)) as
release_date
from `music.tm_podcasts_show`
)k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 and b.release_date < season_start_1
group by 1,2,3,4,5,6,7
)X

--NEW FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as followers_past_week,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between start_date_1 and
end_date_1
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
start_date_1 and end_date_1
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id

--TOTAL FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as total_podcast_show_followers,
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'
group by 1
))
group by 1
)Z
on X.show_id = Z.entity_id

--NEW LIKES
left join
(SELECT show_id,
sum(podcast_likes) as likes_past_week
from
((
SELECT entity_id, count(*) as podcast_likes, 'loggedin' as type
FROM `gaana_favourite.tm_user_episode_*`
WHERE cast(entity_status as int64) > 1 and date(timestamp(created_on)) between
start_date_1 and end_date_1
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_likes, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_episode_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
start_date_1 and end_date_1
group by 1
)) track_likes
join
(select distinct show_id, safe_cast(track_id as int64) as tid
from `music.tm_podcast_seasons`) maps
on cast(track_likes.entity_id as int64) = maps.tid
group by 1
) AA
on X.show_id = AA.show_id

--Trending Podcasts Show-season level

Declare start_date_1,end_date_1,season_start_1 DATE;


SET (start_date_1,end_date_1,season_start_1) = ('2021-04-20','2021-04-20','2021-04-20');

select X.*,
round(overall_listening_time/(60*total_users),2) as mpu,
round(overall_listening_time/total_tracks_length,2) as completion_perc,
followers_past_week as followers_past_week,
total_podcast_show_followers as total_followers,
date_diff(start_date_1,season_start,day) as age_on_weekstart,
date_diff(start_date_1,season_start,day)/30 as age_in_months,
likes_past_week

from(
select
show_id, show_title, season, categories, language, show_length, show_episode_count,
season_start, popall, pop30days, pop7days,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,
sum(track_length) as total_tracks_length,
sum(search_plays) as total_search_plays

from (
select device_id, track_id,q.track_length,
sum(duration) as overall_listening_time,
count(1) as overall_plays,
sum(case when (section_id in (select id from `music_analytics.tm_sections` where lower(name)
like 'searchautosuggest%')) then 1 else 0 end) as search_plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
join (select DISTINCT id,safe_cast(duration as int64) as track_length from `music.tm_track`
where lower(sapid) in ('podcast')) q on p.track_id = q.id

where (_table_suffix between format_date('%Y%m%d', date(start_date_1)) and


format_date('%Y%m%d', date(end_date_1)))
and track_length>0
and section_id not in (select id from `music_analytics.tm_sections` where lower(name) like
'popularpodcasts') --exclude plays from trending section

--and duration>30
--and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
)a
join
(
select j.tid as episode_id, i.title as episode_name, i.track_length as episode_length,
j.show_id as show_id, k.title as show_title, season, language, categories, season_start,
count(j.tid) over(partition by show_id,season) as show_episode_count,
sum(i.track_length) over(partition by show_id,season) as show_length,
sum(cast(popall as int64)) over(partition by show_id,season) as popall,
sum(cast(pop30days as int64)) over(partition by show_id,season) as pop30days,
sum(cast(pop7days as int64)) over(partition by show_id,season) as pop7days,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id,tid,season,MIN(release_date) Over (partition by show_id,season order
by release_date) as season_start from (
select distinct show_id, safe_cast(track_id as int64) as tid, season,
date(timestamp(created_on)) as release_date
from `music.tm_podcast_seasons` )
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 and b.season_start < season_start_1
group by 1,2,3,4,5,6,7,8,9,10,11
)X

--NEW FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as followers_past_week,
sum(case when type = 'loggedin' then podcast_followers end) as
podcast_show_followers_loggedin,
sum(case when type = 'non-loggedin' then podcast_followers end) as
podcast_show_followers_non_loggedin
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2' and date(timestamp(created_on)) between start_date_1 and
end_date_1
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
start_date_1 and end_date_1
group by 1
))
group by 1
)Y
on X.show_id = Y.entity_id
--TOTAL FOLLOWERS
left join
(SELECT entity_id,
sum(podcast_followers) as total_podcast_show_followers,
from
((
SELECT entity_id, count(*) as podcast_followers, 'loggedin' as type
FROM `gaana_favourite.tm_user_podcast_*`
WHERE entity_status = '2'
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_followers, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_podcast_*`
WHERE entity_status = '2' and sync_status = '0'
group by 1
))
group by 1
)Z
on X.show_id = Z.entity_id

--NEW LIKES
left join
(SELECT show_id,season,
sum(podcast_likes) as likes_past_week
from
((
SELECT entity_id, count(*) as podcast_likes, 'loggedin' as type
FROM `gaana_favourite.tm_user_episode_*`
WHERE cast(entity_status as int64) > 1 and date(timestamp(created_on)) between
start_date_1 and end_date_1
group by 1
)
union all
(
SELECT entity_id, count(*) as podcast_likes, 'non-loggedin' as type
FROM `gaana_favourite.tm_device_episode_*`
WHERE entity_status = '2' and sync_status = '0' and date(timestamp(created_on)) between
start_date_1 and end_date_1
group by 1
)) track_likes
join
(select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`) maps
on cast(track_likes.entity_id as int64) = maps.tid
group by 1,2
) AA
on X.show_id = AA.show_id and X.season = AA.season

List of Popular Podcasts on App query

select p.*,rr.*,t.name
from
flatten((
SELECT integer(split(podcasts, ',')) as podcast_id, integer(lang_id) as lang_id
FROM [gaana-bigquery-1315:music.tm_gpd_lang_summary]
where is_active = '1'
), podcast_id) p
left join
[music.tm_language] t
on t.id = p.lang_id
left join (
select id, title, categories
from [music.tm_podcasts_show]) rr
on p.podcast_id = rr.id

Trending Section performance by day

Declare start_date_1,end_date_1,season_start_1 DATE;


SET (start_date_1,end_date_1) = ('2021-04-20','2021-04-23');

select aa.*,bb.users,bb.duration,bb.plays from(


select date,show_id,season,b.title,b.language,
count(distinct device_id) as trending_users,
sum(overall_listening_time) as trending_duration,
sum(overall_plays) as trending_plays
from(
select
date(timestamp(time_played_on)) as date,
device_id, track_id,show_id,season,
sum(duration) as overall_listening_time,
count(1) as overall_plays,
from `music_analytics_archived.ma_track_play_log_detailed_*` p
join
(select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`) q
on p.track_id = q.tid

where section_id in (select id from `music_analytics.tm_sections` where lower(name) like


'popularpodcasts')
and _table_suffix between format_date('%Y%m%d',date(start_date_1)) and
format_date('%Y%m%d',date(end_date_1))
group by 1,2,3,4,5) a
join (select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) b
on safe_cast(a.show_id as int64) = b.id
group by 1,2,3,4,5
order by 5 desc) aa
left join
(
select date,show_id,season,
count(distinct device_id) as users,
sum(overall_listening_time) as duration,
sum(overall_plays) as plays
from(
select
date(timestamp(time_played_on)) as date,device_id, track_id,show_id,season,
sum(duration) as overall_listening_time,
count(1) as overall_plays,
from `music_analytics_archived.ma_track_play_log_detailed_*` p
join
(select distinct show_id, safe_cast(track_id as int64) as tid, season
from `music.tm_podcast_seasons`) q
on p.track_id = q.tid
where _table_suffix between format_date('%Y%m%d',date(start_date_1)) and
format_date('%Y%m%d',date(end_date_1))
group by 1,2,3,4,5) a
join (select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) b
on safe_cast(a.show_id as int64) = b.id

group by 1,2,3
order by 5 desc) bb
on aa.date = bb.date and aa.show_id = bb.show_id and aa.season = bb.season
order by 1,2,3 desc

All Podcast List with details


select
show_id,title,language,categories,season,season_start,last_episode_release,date_diff(last_epis
ode_release,season_start,day) as delta,count(tid) as num_episodes from(

select distinct show_id,tid,title,language,categories,season,MIN(release_date) Over (partition by


show_id,season) as season_start,MAX(release_date) Over (partition by show_id,season) as
last_episode_release
from (select distinct show_id, safe_cast(track_id as int64) as tid, season,
date(timestamp(created_on)) as release_date
from `music.tm_podcast_seasons` )
j
left join (
select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id)
where date_diff(CURRENT_DATE,season_start,day)>90 and title is not null
group by 1,2,3,4,5,6,7,8
order by 1,5 desc

Podcast behaviour after n months of release (WIP)

Declare start_date_1,end_date_1,season_start_1 DATE;


SET (start_date_1,end_date_1,season_start_1) = ('2019-05-01','2021-04-25','2019-05-28');

select
show_id, show_title, season, categories, language, show_length, show_episode_count,
season_start,
case
when date_diff(date,season_start,day)<=30 then '1'
when date_diff(date,season_start,day)>30 and date_diff(date,season_start,day)<=60 then '2'
when date_diff(date,season_start,day)>60 and date_diff(date,season_start,day)<=90 then '3'
when date_diff(date,season_start,day)>90 and date_diff(date,season_start,day)<=120 then '4'
when date_diff(date,season_start,day)>120 and date_diff(date,season_start,day)<=150 then '5'
else '5+'
end as months_after_release,
count(distinct device_id) as total_users,
sum(overall_plays) as overall_plays,
sum(overall_listening_time) as overall_listening_time,

from (
select date(timestamp(time_played_on)) as date,
device_id, track_id,
sum(duration) as overall_listening_time,
count(1) as overall_plays,
from `music_analytics_archived.ma_track_play_log_detailed_*` p

where (_table_suffix between format_date('%Y%m%d', date(start_date_1)) and


format_date('%Y%m%d', date(end_date_1)))
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast'))
group by 1,2,3
)a
join
(
select j.tid as episode_id,
j.show_id as show_id, k.title as show_title, season, language, categories, season_start,
count(j.tid) over(partition by show_id,season) as show_episode_count,
sum(i.track_length) over(partition by show_id,season) as show_length,
from (
select id, title, safe_cast(duration as int64) as track_length, popall, pop30days, pop7days
from `music.tm_track`
where lower(sapid) in ('podcast')
)i
join
(
select distinct show_id,tid,season,MIN(release_date) Over (partition by show_id,season order
by release_date) as season_start from (
select distinct show_id, safe_cast(track_id as int64) as tid, season,
date(timestamp(created_on)) as release_date
from `music.tm_podcast_seasons` )
)j
on i.id = j.tid
left join (
select id, title, trim(lower(language)) as language, categories
from `music.tm_podcasts_show`) k
on safe_cast(j.show_id as int64) = k.id
)b
on a.track_id = b.episode_id
where b.show_length <> 0 --and b.season_start = season_start_1
group by 1,2,3,4,5,6,7,8,9

SELECTING TOP USERS FOR SECTIONS

with table1 as (
select month,section_id,user_id, email, dob, sex, fullname,
row_number() over(PARTITION BY section_id,month ORDER BY
round((sum(duration)/3600),2) DESC) AS rank1,
sum(plays) as total_plays,
round((sum(duration)/3600),2) as total_duration_Hrs
from (
select format_date('%Y%m',cast(timestamp(time_played_on) as date)) as month,
user_id, section_id,
Sum (cast (duration as Float64))as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-08-1')) and
format_date('%Y%m%d', date('2021-09-30')))
and section_id in (4992,21006,21014,1020,18063,13845) and user_id NOT IN (0)
--and cast (duration as Float64)>15 --To take only those plays where duration > 30s
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast')) --
To take all podcast tracks ONLY
group by 1,2,3
) a
join(
select id,fullname,email,sex,dob from music.tm_users
) b
on safe_cast(a.user_id as int64) = b.id
group by 1,2,3,4,5,6,7
)
select * from table1 where rank1 <=200

SECTION WISE USER CONSUMPTION - YOUR FOLLOWED SHOWS

select date1,
row_number() over(PARTITION BY date1 ORDER BY round((sum(duration)/3600),2) DESC) AS
rank1,
sum(plays) as total_plays,
count (distinct user_id) as users,
round((sum(duration)/3600),2) as total_duration_Hrs
from (
select safe_cast(timestamp(time_played_on) as date) as date1,
user_id, section_id,
Sum (cast (duration as Float64))as duration,
count(1) as plays
from `music_analytics_archived.ma_track_play_log_detailed_*` p
--select date range as required
where (_table_suffix between format_date('%Y%m%d', date('2021-10-1')) and
format_date('%Y%m%d', date('2021-10-24')))
and user_id IN (select distinct user_id from
`music_analytics_archived.ma_track_play_log_detailed_*` p
where (_table_suffix between format_date('%Y%m%d', date('2021-10-1')) and
format_date('%Y%m%d', date('2021-10-24')))
and section_id in (39312) )
--and cast (duration as Float64)>15 --To take only those plays where duration > 30s
and track_id in (select id from `music.tm_track` where lower(sapid) in ('podcast')) --
To take all podcast tracks ONLY
and page_id in (2933,3031)
group by 1,2,3
)group by 1
order by date1 ASC

You might also like