Find the starting indices of the substrings in string (S) which is made by concatenating all words from a list(L)

Last Updated : 01 Mar, 2023

You are given a string S, and a list of words L i.e array/vector of strings (Words in list L are all of the same length). Find the starting indices of the substrings in string S, which contains all the words present in list L. The order of words of list L appearing inside string S does not matter i.e if string S is "barfooapplefoobar" and list of words (L) is ["foo", "bar"] then we have to look for substrings "foobar", "barfoo" in string S. Note : Words inside the list L can repeat.

Examples :

Input : S: "barfoothefoobarman" 
        L: ["foo", "bar"]                     
Output : 0 9
Explanation : 
// at index 0 : barfoo
// at index 9 : foobar 

Input : S: "catbatatecatatebat"
        L: ["cat", "ate", "bat"] 
Output : 0 3 9
Explanation : 
// at index 0 : catbatate
// at index 3 : batatecat
// at index 9 : catatebat    

Input : S : "abcdababcd"
        L : ["ab", "ab", "cd"]
Output : 0 2 4 
Explanation :
// at index 0 : abcdab
// at index 2 : cdabab
// at index 4 : ababcd

Input : S : "abcdababcd"
        L : ["ab", "ab"]
Output : 4

Approach : We can use Hashing Technique to solve the above problem.

Let's see the steps :

Declare a map (hash_map) which stores all words of List L corresponding to their occurrences inside list L.
Traverse through all possible substrings in string S which are equal to size_L(total number of characters produced if all the words in list L are concatenated).
Create a temporary map (temp_hash_map) and initialize it with original map(hash_map) for every possible substring.
Extract the words from the substring and if the word is present in temp_hash_map we decrease it's corresponding count, if it's not present in temp_hash_map we simply break.
After traversing the substring we traverse temp_hash_map and look for any key which has it's count > 0. If we found no such key it means that all the words in list L were found in substring and store the given starting index of the substring, if we find a key which has it's count > 0 it means we did not traversed whole substring because we came across a word which was not in temp_hash_map.

Below is the implementation of above approach :

C++

// CPP program to calculate the starting indices
// of substrings inside S which contains all the
// words present in List L.
#include <bits/stdc++.h>
using namespace std;

// Returns an integer vector consisting of starting
// indices of substrings present inside the string S
vector<int> findSubstringIndices(string S, 
                            const vector<string>& L)
{

    // Number of a characters of a word in list L.
    int size_word = L[0].size();

    // Number of words present inside list L.
    int word_count = L.size();

    // Total characters present in list L.
    int size_L = size_word * word_count;

    // Resultant vector which stores indices.
    vector<int> res;

    // If the total number of characters in list L
    // is more than length of string S itself.
    if (size_L > S.size())
        return res;

    // Map stores the words present in list L
    // against it's occurrences inside list L
    unordered_map<string, int> hash_map;

    for (int i = 0; i < word_count; i++) 
        hash_map[L[i]]++;    

    for (int i = 0; i <= S.size() - size_L; i++) {
        unordered_map<string, int> temp_hash_map(hash_map);

        int j = i,count=word_count;

        // Traverse the substring
        while (j < i + size_L) {

            // Extract the word
            string word = S.substr(j, size_word);


            // If word not found or if frequency of current word is more than required simply break.
            if (hash_map.find(word) == hash_map.end()||temp_hash_map[word]==0)
                break;

            // Else decrement the count of word from hash_map
            else
               { temp_hash_map[word]--;count--;} 


            j += size_word;
        }
     
        // Store the starting index of that substring when all the words in the list are in substring
        if (count == 0)
            res.push_back(i);
    }

    return res;
}

// Driver Code
int main()
{
    string S = "barfoothefoobarman";
    vector<string> L = { "foo", "bar" };
    vector<int> indices = findSubstringIndices(S, L);
    for (int i = 0; i < indices.size(); i++)
        cout << indices[i] << " ";
    return 0;
}

Java

// Java program to calculate the starting indices
// of substrings inside S which contains all the
// words present in List L.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

class GFG 
{
    public static ArrayList<Integer> 
    findSubstring(String A, final List<String> B) 
    {

        // Number of a characters of a word in list L.
        int size_word = B.get(0).length();
        
        // Number of words present inside list L. 
        int word_count = B.size();

        // Total characters present in list L.
        int size_l = size_word * word_count;

        // Resultant vector which stores indices.
        ArrayList<Integer> res = new ArrayList<Integer>();
        int n = A.length();
        
        // If the total number of characters in list L 
        // is more than length of string S itself.
        if (size_l > n) 
        {
            return res;
        }

        // Map stores the words present in list L 
        // against it's occurrences inside list L 
        HashMap<String, Integer> hashMap = 
            new HashMap<String, Integer>();

        for (String word : B) 
        {
            hashMap.put(word, hashMap.getOrDefault(word, 0) + 1);
        }

        
        for (int i = 0; i <= n - size_l; i++) 
        {
            HashMap<String, Integer> tempMap = 
            (HashMap<String, Integer>) hashMap.clone();
            int j = i, count = word_count;
            
            // Traverse the substring 
            while (j < i + size_l) 
            {
                // Extract the word
                String word = A.substring(j, j + size_word);
            
                // If word not found or if frequency 
                // of current word is more than required simply break. 
                if (!hashMap.containsKey(word) || tempMap.get(word) == 0) 
                {
                    break;
                } 
                
                // Else decrement the count of word from hash_map
                else 
                {
                    tempMap.put(word, tempMap.get(word) - 1);
                    count--;
                }
                j += size_word;
            }
            
            // Store the starting index of that
            // substring when all the words in 
            // the list are in substring 
            if (count == 0)
            {
                res.add(i);
            }

        }
        return res;
    }

    // Driver code
    public static void main(String[] args) 
    {
        String S = "barfoothefoobarman";
        ArrayList<String> L = 
        new ArrayList<>(Arrays.asList("foo", "bar"));
        ArrayList<Integer> indices = findSubstring(S, L);
        for (Integer i : indices)
        {
            System.out.println(i);
        }
    }
}

// This code is contributed by Manish Sakariya

Python3

# Python3 program to calculate the starting indices
# of substrings inside S which contains all the
# words present in List L.

# Returns an integer vector consisting of starting
# indices of substrings present inside the string S
def findSubStringIndices(s, L):

    # Number of a characters of a word in list L.
    size_word = len(L[0])

    # Number of words present inside list L.
    word_count = len(L)

    # Total characters present in list L.
    size_L = size_word * word_count

    # Resultant vector which stores indices.
    res = []

    # If the total number of characters in list L
    # is more than length of string S itself.
    if size_L > len(s):
        return res

    # Map stores the words present in list L
    # against it's occurrences inside list L
    hash_map = dict()

    for i in range(word_count):
        if L[i] in hash_map:
            hash_map[L[i]] += 1
        else:
            hash_map[L[i]] = 1

    for i in range(0, len(s) - size_L + 1, 1):
        temp_hash_map = hash_map.copy()
        j = i
        count = word_count

        # Traverse the substring
        while j < i + size_L:

            # Extract the word
            word = s[j:j + size_word]

            # If word not found or if frequency of 
            # current word is more than required simply break.
            if (word not in hash_map or 
                temp_hash_map[word] == 0):
                break

            # Else decrement the count of word
            # from hash_map
            else:
                temp_hash_map[word] -= 1
                count -= 1
            j += size_word

        # Store the starting index of that substring
        # when all the words in the list are in substring
        if count == 0:
            res.append(i)
    return res

# Driver Code
if __name__ == "__main__":
    s = "barfoothefoobarman"
    L = ["foo", "bar"]
    indices = findSubStringIndices(s, L)
    
    print(*indices)

# This code is contributed by
# sanjeev2552

JavaScript

<script>

// JavaScript code to implement the above approach

// Returns an integer vector consisting of starting
// indices of substrings present inside the string S
function findSubStringIndices(s, L){

    // Number of a characters of a word in list L.
    let size_word = L[0].length

    // Number of words present inside list L.
    let word_count = L.length

    // Total characters present in list L.
    let size_L = size_word * word_count

    // Resultant vector which stores indices.
    let res = []

    // If the total number of characters in list L
    // is more than length of string S itself.
    if(size_L > s.length)
        return res

    // Map stores the words present in list L
    // against it's occurrences inside list L
    let hash_map = new Map()

    for(let i=0;i<word_count;i++){
        if(hash_map.has(L[i]))
            hash_map.set(L[i],hash_map.get(L[i]) + 1)
        else
            hash_map.set(L[i], 1)
    }

    for(let i=0;i< s.length - size_L + 1;i++){
        let temp_hash_map = new Map(hash_map)
        let j = i
        let count = word_count

        // Traverse the substring
        while(j < i + size_L){

            // Extract the word
            let word = s.substring(j,j+size_word)

            // If word not found or if frequency of
            // current word is more than required simply break.
            if (hash_map.has(word) == false || temp_hash_map.get(word) == 0)
                break

            // Else decrement the count of word
            // from hash_map
            else{
                temp_hash_map.set(word ,temp_hash_map.get(word)- 1)
                count -= 1
            }
            j += size_word
        }

        // Store the starting index of that substring
        // when all the words in the list are in substring
        if(count == 0)
            res.push(i)
    }
    return res
}

// Driver Code

let s = "barfoothefoobarman"
let L = ["foo", "bar"]
let indices = findSubStringIndices(s, L)

document.write(indices)

// This code is contributed by Shinjanpatra

</script>

// C# Program for the above approach

using System;
using System.Collections.Generic;

class Program
{
    // Returns an integer list consisting of starting
    // indices of substrings present inside the string S
    static List<int> FindSubstringIndices(string S, List<string> L)
    {
        // Number of a characters of a word in list L.
        int size_word = L[0].Length;

        // Number of words present inside list L.
        int word_count = L.Count;

        // Total characters present in list L.
        int size_L = size_word * word_count;

        // Resultant list which stores indices.
        List<int> res = new List<int>();

        // If the total number of characters in list L
        // is more than length of string S itself.
        if (size_L > S.Length)
            return res;

        // Map stores the words present in list L
        // against it's occurrences inside list L
        Dictionary<string, int> hash_map = new Dictionary<string, int>();

        foreach (string word in L)
            hash_map[word] = hash_map.GetValueOrDefault(word, 0) + 1;

        for (int i = 0; i <= S.Length - size_L; i++)
        {
            Dictionary<string, int> temp_hash_map = new Dictionary<string, int>(hash_map);

            int j = i, count = word_count;

            // Traverse the substring
            while (j < i + size_L)
            {
                // Extract the word
                string word = S.Substring(j, size_word);

                // If word not found or if frequency of current word is more than required simply break.
                if (!hash_map.ContainsKey(word) || temp_hash_map[word] == 0)
                    break;

                // Else decrement the count of word from hash_map
                else
                {
                    temp_hash_map[word]--;
                    count--;
                }

                j += size_word;
            }

            // Store the starting index of that substring when all the words in the list are in substring
            if (count == 0)
                res.Add(i);
        }

        return res;
    }

    // Driver Code
    static void Main(string[] args)
    {
        string S = "barfoothefoobarman";
        List<string> L = new List<string> { "foo", "bar" };
        List<int> indices = FindSubstringIndices(S, L);
        foreach (int index in indices)
            Console.Write(index + " ");
    }
}

// This code is contributed by Prince

Output

0 9

Time Complexity : O(N - K) * K N : length of string S. K : total length of list L if all the words are concatenated. If L : ["ab", "cd"] then K = 4.

Space Complexity: O(K), where K : total length of list L