Frequency of a Substring in a String
Last Updated :
18 Apr, 2023
Given an input string and a pattern, the task is to find the frequency of occurrences of the string pattern in a given string.
Examples:
Input: pattern = "man", string = "dhimanman"
Output: 2
Input: pattern = "nn", string = "Banana"
Output: 0
Input: pattern = "aa", string = "aaaaa"
Output : 4
Approach:
A simple solution is to match characters one by one. And whenever we see a complete match, increment count. For this, we can use Naive pattern searching.
Below is the implementation of the above approach.
C++
// Simple C++ program to count occurrences
// of pat in txt.
#include <bits/stdc++.h>
using namespace std;
int countFreq(string& pat, string& txt)
{
int M = pat.length();
int N = txt.length();
int res = 0;
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
/* For current index i, check for
pattern match */
int j;
for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M) {
res++;
}
}
return res;
}
/* Driver program to test above function */
int main()
{
string txt = "dhimanman";
string pat = "man";
cout << countFreq(pat, txt);
return 0;
}
Java
// Simple Java program to count occurrences
// of pat in txt.
class GFG {
static int countFreq(String pat, String txt)
{
int M = pat.length();
int N = txt.length();
int res = 0;
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
/* For current index i, check for
pattern match */
int j;
for (j = 0; j < M; j++) {
if (txt.charAt(i + j) != pat.charAt(j)) {
break;
}
}
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M) {
res++;
j = 0;
}
}
return res;
}
/* Driver program to test above function */
static public void main(String[] args)
{
String txt = "dhimanman";
String pat = "man";
System.out.println(countFreq(pat, txt));
}
}
// This code is contributed by 29AjayKumar
Python3
# Simple python program to count
# occurrences of pat in txt.
def countFreq(pat, txt):
M = len(pat)
N = len(txt)
res = 0
# A loop to slide pat[] one by one
for i in range(N - M + 1):
# For current index i, check
# for pattern match
j = 0
while j < M:
if (txt[i + j] != pat[j]):
break
j += 1
if (j == M):
res += 1
j = 0
return res
# Driver Code
if __name__ == '__main__':
txt = "dhimanman"
pat = "man"
print(countFreq(pat, txt))
# This code is contributed
# by PrinciRaj1992
C#
// Simple C# program to count occurrences
// of pat in txt.
using System;
public class GFG {
static int countFreq(String pat, String txt)
{
int M = pat.Length;
int N = txt.Length;
int res = 0;
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
/* For current index i, check for
pattern match */
int j;
for (j = 0; j < M; j++) {
if (txt[i + j] != pat[j]) {
break;
}
}
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M) {
res++;
j = 0;
}
}
return res;
}
/* Driver program to test above function */
static public void Main()
{
String txt = "dhimanman";
String pat = "man";
Console.Write(countFreq(pat, txt));
}
}
// This code is contributed by 29AjayKumar
PHP
<?php
// Simple PHP program to count occurrences
// of pat in txt.
function countFreq($pat, $txt)
{
$M = strlen($pat);
$N = strlen($txt);
$res = 0;
/* A loop to slide pat[] one by one */
for ($i = 0; $i <= $N - $M; $i++)
{
/* For current index i, check for
pattern match */
for ($j = 0; $j < $M; $j++)
if ($txt[$i+$j] != $pat[$j])
break;
// if pat[0...M-1] = txt[i, i+1, ...i+M-1]
if ($j == $M)
{
$res++;
$j = 0;
}
}
return $res;
}
// Driver Code
$txt = "dhimanman";
$pat = "man";
echo countFreq($pat, $txt);
// This code is contributed
// by Akanksha Rai
JavaScript
<script>
// JavaScript program to count occurrences
// of pat in txt.
let mod = 100000007;
function countFreq(pat, txt)
{
let M = pat.length;
let N = txt.length;
let res = 0;
// A loop to slide pat[] one by one
for(let i = 0; i <= N - M; i++)
{
// For current index i, check for
// pattern match
let j;
for(j = 0; j < M; j++)
{
if (txt[i + j] != pat[j])
{
break;
}
}
// If pat[0...M-1] = txt[i, i+1, ...i+M-1]
if (j == M)
{
res++;
j = 0;
}
}
return res;
}
// Driver Code
let txt = "dhimanman";
let pat = "man";
document.write(countFreq(pat, txt));
// This code is contributed by code_hunt
</script>
Time Complexity: O(M * N)
Auxiliary Space: O(1)
Efficient Approach:
An efficient solution is to use KMP algorithm.
Below is the implementation of the above approach.
C++
// C++ program to count occurrences
// of pattern in a text.
#include <iostream>
using namespace std;
void computeLPSArray(string pat, int M, int lps[])
{
// Length of the previous longest
// prefix suffix
int len = 0;
int i = 1;
lps[0] = 0; // lps[0] is always 0
// The loop calculates lps[i] for
// i = 1 to M-1
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not
// increment i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
int KMPSearch(string pat, string txt)
{
int M = pat.length();
int N = txt.length();
// Create lps[] that will hold the longest
// prefix suffix values for pattern
int lps[M];
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int res = 0;
int next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
}
// Mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]]
// characters, they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
// Driver code
int main()
{
string txt = "geeksforgeeks";
string pat = "eeks";
int ans = KMPSearch(pat, txt);
cout << ans;
return 0;
}
// This code is contributed by akhilsaini
Java
// Java program to count occurrences of pattern
// in a text.
class KMP_String_Matching {
int KMPSearch(String pat, String txt)
{
int M = pat.length();
int N = txt.length();
// create lps[] that will hold the longest
// prefix suffix values for pattern
int lps[] = new int[M];
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int res = 0;
int next_i = 0;
while (i < N) {
if (pat.charAt(j) == txt.charAt(i)) {
j++;
i++;
}
if (j == M) {
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
// We start i to check for more than once
// appearance of pattern, we will reset i
// to previous start+1
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
// mismatch after j matches
else if (i < N
&& pat.charAt(j) != txt.charAt(i)) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
void computeLPSArray(String pat, int M, int lps[])
{
// length of the previous longest prefix suffix
int len = 0;
int i = 1;
lps[0] = 0; // lps[0] is always 0
// the loop calculates lps[i] for i = 1 to M-1
while (i < M) {
if (pat.charAt(i) == pat.charAt(len)) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not increment
// i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
// Driver program to test above function
public static void main(String args[])
{
String txt = "geeksforgeeks";
String pat = "eeks";
int ans
= new KMP_String_Matching().KMPSearch(pat, txt);
System.out.println(ans);
}
}
Python3
# Python3 program to count occurrences of
# pattern in a text.
def KMPSearch(pat, txt):
M = len(pat)
N = len(txt)
# Create lps[] that will hold the longest
# prefix suffix values for pattern
lps = [None] * M
j = 0 # index for pat[]
# Preprocess the pattern (calculate lps[]
# array)
computeLPSArray(pat, M, lps)
i = 0 # index for txt[]
res = 0
next_i = 0
while (i < N):
if pat[j] == txt[i]:
j = j + 1
i = i + 1
if j == M:
# When we find pattern first time,
# we iterate again to check if there
# exists more pattern
j = lps[j - 1]
res = res + 1
# We start i to check for more than once
# appearance of pattern, we will reset i
# to previous start+1
if lps[j] != 0:
next_i = next_i + 1
i = next_i
j = 0
# Mismatch after j matches
elif ((i < N) and (pat[j] != txt[i])):
# Do not match lps[0..lps[j-1]]
# characters, they will match anyway
if (j != 0):
j = lps[j - 1]
else:
i = i + 1
return res
def computeLPSArray(pat, M, lps):
# Length of the previous longest
# prefix suffix
len = 0
i = 1
lps[0] = 0 # lps[0] is always 0
# The loop calculates lps[i] for
# i = 1 to M-1
while (i < M):
if pat[i] == pat[len]:
len = len + 1
lps[i] = len
i = i + 1
else: # (pat[i] != pat[len])
# This is tricky. Consider the example.
# AAACAAAA and i = 7. The idea is similar
# to search step.
if len != 0:
len = lps[len - 1]
# Also, note that we do not increment
# i here
else: # if (len == 0)
lps[i] = len
i = i + 1
# Driver code
if __name__ == "__main__":
txt = "geeksforgeeks"
pat = "eeks"
ans = KMPSearch(pat, txt)
print(ans)
# This code is contributed by akhilsaini
C#
// C# program to count occurrences of pattern
// in a text.
using System;
public class KMP_String_Matching {
int KMPSearch(String pat, String txt)
{
int M = pat.Length;
int N = txt.Length;
// create lps[] that will hold the longest
// prefix suffix values for pattern
int[] lps = new int[M];
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int res = 0;
int next_i = 0;
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
// We start i to check for more than once
// appearance of pattern, we will reset i
// to previous start+1
if (lps[j] != 0)
i = ++next_i;
j = 0;
}
// mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
void computeLPSArray(String pat, int M, int[] lps)
{
// length of the previous longest prefix suffix
int len = 0;
int i = 1;
lps[0] = 0; // lps[0] is always 0
// the loop calculates lps[i] for i = 1 to M-1
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0) {
len = lps[len - 1];
// Also, note that we do not increment
// i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
// Driver code
public static void Main(String[] args)
{
String txt = "geeksforgeeks";
String pat = "eeks";
int ans
= new KMP_String_Matching().KMPSearch(pat, txt);
Console.WriteLine(ans);
}
}
// This code is contributed by Princi Singh
JavaScript
<script>
// JavaScript program to count occurrences
// of pattern in a text.
function computeLPSArray(pat,M,lps)
{
// Length of the previous longest
// prefix suffix
let len = 0;
let i = 1;
lps[0] = 0; // lps[0] is always 0
// The loop calculates lps[i] for
// i = 1 to M-1
while (i < M)
{
if (pat[i] == pat[len])
{
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0)
{
len = lps[len - 1];
// Also, note that we do not
// increment i here
}
else // if (len == 0)
{
lps[i] = len;
i++;
}
}
}
}
function KMPSearch(pat,txt)
{
let M = pat.length;
let N = txt.length;
// Create lps[] that will hold the longest
// prefix suffix values for pattern
let lps = new Array(M);
lps.fill(0);
let j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[]
// array)
computeLPSArray(pat, M, lps);
let i = 0; // index for txt[]
let res = 0;
let next_i = 0;
while (i < N)
{
if (pat[j] == txt[i])
{
j++;
i++;
}
if (j == M)
{
// When we find pattern first time,
// we iterate again to check if there
// exists more pattern
j = lps[j - 1];
res++;
// We start i to check for more than once
// appearance of pattern, we will reset i
// to previous start+1
if (lps[j]!=0)
i = ++next_i;
j = 0;
}
// Mismatch after j matches
else if (i < N && pat[j] != txt[i])
{
// Do not match lps[0..lps[j-1]]
// characters, they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
return res;
}
// Driver code
let txt = "geeksforgeeks";
let pat = "eeks";
let ans = KMPSearch(pat, txt);
document.write(ans);
</script>
Time Complexity: O(M + N)
Auxiliary Space: O(M) As an array of size M is used to store the longest prefix suffix values for the pattern.
Similar Reads
Frequency of a substring in a string using pthread Given an input string and a substring. Find the frequency of occurrences of a substring in the given string using pthreads. Examples: Input: string = "man" substring = "dhimanman"Output: 2Input: string = "banana" substring = "nn"Output: 0Note: It is advised to execute the program in Linux based syst
6 min read
Frequency of a string in an array of strings You are given a collection of strings and a list of queries. For every query there is a string given. We need to print the number of times the given string occurs in the collection of strings. Examples: Input : arr[] = {wer, wer, tyu, oio, tyu} q[] = {wer, tyu, uio}Output : 2 2 0Explanation : q[0] a
15 min read
Queries for frequencies of characters in substrings Given a string s and Q number of queries. Each query Q consists of l and r and a character c. Find the frequency of character c in substring l to r. Examples: Input : s = geeksforgeeks 4 0 5 e 2 6 f 4 7 m 0 12 e Output : 2 1 0 4 Substring from 0 to 5 is geeksf. Here e occurs 2 times. Input : s = app
6 min read
Find frequency of all characters across all substrings of given string Given a string S containing all lowercase characters and its length N. Find frequency of all characters across all substrings of the given string. Examples: Input: N = 3, S = "aba"Output: a 6b 4Explanation: The substrings are: a, b, a, ab, ba, aba. The frequency of each character: a = 6, b = 4. Henc
4 min read
Find frequency of each word in a string in Python Write a python code to find the frequency of each word in a given string. Examples: Input : str[] = "Apple Mango Orange Mango Guava Guava Mango" Output : frequency of Apple is : 1 frequency of Mango is : 3 frequency of Orange is : 1 frequency of Guava is : 2 Input : str = "Train Bus Bus Train Taxi A
7 min read
Maximum length substring with highest frequency in a string Given a string. The task is to find the maximum occurred substring with a maximum length. These occurrences can overlap. Examples: Input: str = "abab" Output: ab "a", "b", "ab" are occur 2 times. But, "ab" has maximum length Input: str = "abcd" Output: a Approach: The idea is to store the frequency
5 min read
Count of substrings of given string with frequency of each character at most K Given a string str, the task is to calculate the number of substrings of the given string such that the frequency of each element of the string is almost K. Examples: Input: str = "abab", K = 1Output: 7Explanation: The substrings such that the frequency of each character is atmost 1 are "a", "b", "a
6 min read
Substring of length K having maximum frequency in the given string Given a string str, the task is to find the substring of length K which occurs the maximum number of times. If more than one string occurs maximum number of times, then print the lexicographically smallest substring. Examples: Input: str = "bbbbbaaaaabbabababa", K = 5Output: ababaExplanation:The sub
14 min read
Queries to find frequencies of a string within specified substrings Given a string S and a matrix Q of queries, each specifying the starting and ending indices L( = Q[i][0]) and R( = Q[i][0]) respectively of a substring of S, the task is to find the frequency of string K in substring [L, R]. Note: The ranges follow the 1-based indexing. Examples: Input: S = "GFGFFGF
5 min read
Count substrings made up of a single distinct character Given a string S of length N, the task is to count the number of substrings made up of a single distinct character.Note: For the repetitive occurrences of the same substring, count all repetitions. Examples: Input: str = "geeksforgeeks"Output: 15Explanation: All substrings made up of a single distin
5 min read