import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
# Load the Excel file
file_path = 'Matching Code.xlsx'
sheet1 = pd.read_excel(file_path, sheet_name='Sheet1')
sheet2 = pd.read_excel(file_path, sheet_name='Sheet2')
# Function to perform fuzzy matching of Invoice Numbers
def fuzzy_match_invoice(invoice_number, invoice_numbers_list, threshold=10):
"""
Perform fuzzy matching of an invoice number against a list of invoice numbers.
Returns the best match above a certain threshold and its score.
"""
match = process.extractOne(invoice_number, invoice_numbers_list, scorer=fuzz.ratio)
if match and match[1] >= threshold:
return match[0], match[1] # Return the best match and its score
return None, None # No match above the threshold
# Convert all Invoice Numbers to strings to avoid the TypeError
sheet1['Invoice Number'] = sheet1['Invoice Number'].astype(str)
sheet2['Invoice Number'] = sheet2['Invoice Number'].astype(str)
# List to store the results for the reconciliation sheet
reconciliation_data = []
# Track which rows in Sheet2 have been matched
matched_rows_sheet2 = set()
# Compare each row in Sheet1 to the rows in Sheet2
for index1, row1 in sheet1.iterrows():
gstin1 = row1['GSTIN']
invoice1 = row1['Invoice Number']
igst1 = row1['IGST Amount']
cgst1 = row1['CGST Amount']
sgst1 = row1['SGST Amount']
# Find matching rows in Sheet2 based on GSTIN and exact invoice match
matching_rows = sheet2[(sheet2['GSTIN'] == gstin1) & (sheet2['Invoice Number'] == invoice1)]
if not matching_rows.empty:
# Exact match found in Sheet2, mark the row as matched
for _, match_row in matching_rows.iterrows():
reconciliation_data.append([
gstin1, invoice1, 'Exact', match_row['Invoice Number'],
igst1, cgst1, sgst1, match_row['IGST Amount'],
match_row['CGST Amount'], match_row['SGST Amount']
])
matched_rows_sheet2.add(match_row.name) # Mark this row as matched
else:
# Fuzzy matching if exact match is not found
matching_rows = sheet2[sheet2['GSTIN'] == gstin1]
# Filter out already matched rows from Sheet2
unmatched_rows = matching_rows[~matching_rows.index.isin(matched_rows_sheet2)]
if not unmatched_rows.empty:
fuzzy_invoice, score = fuzzy_match_invoice(invoice1, unmatched_rows['Invoice
Number'].tolist())
if fuzzy_invoice:
# Fuzzy match found, get the matched row from Sheet2
fuzzy_match_row = unmatched_rows[unmatched_rows['Invoice Number'] ==
fuzzy_invoice].iloc[0]
reconciliation_data.append([
gstin1, invoice1, 'Fuzzy', fuzzy_invoice,
igst1, cgst1, sgst1, fuzzy_match_row['IGST Amount'],
fuzzy_match_row['CGST Amount'], fuzzy_match_row['SGST Amount'],
score
])
matched_rows_sheet2.add(fuzzy_match_row.name) # Mark this row as matched
else:
# No match found at all
reconciliation_data.append([
gstin1, invoice1, 'No Match', '',
igst1, cgst1, sgst1, '', '', '', 0
])
# Create the DataFrame for the reconciliation sheet
reconciliation_df = pd.DataFrame(reconciliation_data, columns=[
'GSTIN', 'Invoice Number', 'Match Type', 'Matched Invoice Number',
'IGST Amount (Sheet1)', 'CGST Amount (Sheet1)', 'SGST Amount (Sheet1)',
'IGST Amount (Sheet2)', 'CGST Amount (Sheet2)', 'SGST Amount (Sheet2)',
'Fuzzy Match Score'
])
# Save the reconciliation sheet to a new Excel file
with pd.ExcelWriter('Reconciliation_Sheet.xlsx', engine='openpyxl') as writer:
reconciliation_df.to_excel(writer, index=False, sheet_name='Reconciliation Sheet')
print("Reconciliation sheet has been generated and saved as 'Reconciliation_Sheet.xlsx'.")
<<<<MAKE SURE THE WORKBOOK NAME SHOULD BE Matching Code.xlsx>>>>
<<<<IN Matching Code.xlsx, TWO SHEETS NAMED “Sheet1” and “Sheet2”>>>>
<<<<COPY 3B/6B/ITC AVAILED DATA IN SHEET1, GSTR-2B/8A DATA IN SHEET 2>>>>
<<<<<HEADERS NAMED IN EACH SHEET, “GSTIN”, “Invoice Number”, “IGST Amount”, “CGST Amount”,
“SGST Amount”>>>>
<<<<<BEFORE PASTING, MAKE PIVOT OF GSTIN, INVOICE NUMBER, IGST, CGST, SGST>>>>
<<<<<ADJUST THE FUZZY MATCHING SCORE, MAKE IT 10>>>>
<<<<<IN SOME INSTANCES, IT IS FOUND THAT CODE MATCHES INVOICE IN SHEET2 TWICE WITH
SHEET1(ONCE EXACT, ONCE FUZZY), SO TO EXTRACT THE REMARKS IN THE WORKING PROPERLY,
HIGHLIGHT THE DUPLICATE KEY IN RECONCILIATION SHEET, YOU WILL GET THE DIFFERENCE
POINTS)>>>>