def parseDate(stringData,dataRgex=['(\w+\s\d+\,\s\d+)','(\w+\s\d+\,\d+)','(\w+\s\d+\s\d+)','(\d+\s\w+\s\d+)','(\d+\/\d+\/\d+)','(\d+\.\d+\.\d+)','(\d+\-\w+\-\d+)','(\d+\'\w+\s\d+)']):
#print stringData getDate = []
for dataList in dataRgex:
res_find = re.findall(dataList, stringData, re.IGNORECASE)
for dateData in res_find:
try:
dparser.parse(dateData)
return dateData
except Exception as e:
pass return getDate
#dataRgex data type listdataTime = '(\d+\:\d+\:\d+ \w+)|(\d+\:\d+\:\d+\w+)|(\d+\:\d+ \w+)|(\d+\:\w+)'def parseDateTime(stringData,dataRgex=['(\w+\s\d+\,\s\d+)','(\w+\s\d+\,\d+)','(\w+\s\d+\s\d+)','(\d+\s\w+\s\d+)','(\d+\/\d+\/\d+)','(\d+\.\d+\.\d+)','(\d+\-\w+\-\d+)','(\d+\'\w+\s\d+|\d+\'\w+)']):
getTime = []
for dataList in dataRgex:
res_find = re.findall(dataList, stringData, re.IGNORECASE)
for dateData in res_find:
if(res_find):
for dataT in dateData:
try:
dparser.parse(dataT)
getTime.append(dataT)
except Exception as e:
pass return getTime
def parseGSTIN(stringData,dataRgex=['(.*)']):
getGSTIN = []
getData = ['GSTIN','STIN', 'gstin', 'GST']
for data in getData:
for dataList in dataRgex:
try:
res_find = re.search(data+dataList, stringData)
getGSTIN.append(res_find.group(1))
except Exception as e:
pass return getGSTIN
def parseAmount(stringData,dataRgex=['(.*)']):
returnList= []
getData = ['TOTAL', 'TOTAL', 'Total', 'Total', 'total', 'total', 'TotalAmount', 'Total Amount', 'totalamount']
getInvoiceNumber = []
matchKey = "" invNumber = "" listMatch = []
getLen = "" for data in getData:
if ((str(data) in str(stringData)) and str(data) != 'None' and str(stringData) != 'None'):
getLen = stringData.count(str(data))
matchKey = (str(data))
break getPos = [m.start() for m in re.finditer(matchKey, stringData)]
getDataLoop = [stringData[dd:] for dd in getPos]
for stringData in getDataLoop:
invNumberList = stringData.split(matchKey, 1)
invString = invNumberList[1]
invList = invString.split(' ', 1)
if (len(invList) > 1):
invString = invList[1]
else:
invString = invList[0]
finalInvList = invString.split(' ')
invNumber = finalInvList[0].split('\n')[0]
returnList.append(invNumber)
return returnList
def parseInvoiceNumber(stringData,dataRgex=['(.*)']):
getInvoiceNumber = []
getData = ['Ilnvoice No','Invoice Number:','Invoice No','INVOICE NO.','InvoiceNumber','Invoice Number','InvNo','Inv. No.','InvoiceNo','Invoice No.','Invoice Number','lnvolceNo','lnvolce No.','lnvolceNumber','lnvolce Number','noviceNo']
matchKey = "" invNumber = ""
for data in getData:
if((str(data) in str(stringData)) and str(data) != 'None' and str(stringData) !='None'):
matchKey = str(data)
break invNumberList = stringData.split(matchKey,1)
invString = invNumberList[1]
invList = invString.split(' ',1)
if(len(invList)>1):
invString = invList[1]
else:
invString = invList[0]
finalInvList = invString.split(' ')
invNumber = finalInvList[0].split('\n')[0]
return invNumber
def parseCurrency(stringData,dataRgex=['(.*)']):
getCurrency = []
getData = ['TOTAL','TOTAL','Total','Total','total','total','TotalAmount','Total Amount','totalamount','total amount','totalAmount']
for data in getData:
for dataList in dataRgex:
try:
res_find = re.search(data+dataList, stringData)
getCurrency.append(res_find.group(1))
except Exception as e:
pass #p = re.findall(r"[INR]|[$]",str(getCurrency[0])) p = []
curr = ['$','INR','1NR','EUR','AUD','CAD','SGD','GBP']
for c in curr:
for e in getCurrency:
if str(c) in str(e) and str(c) != 'None' and str(e) != 'None':
p = str(c)
return [p]
else:
p = "" return p
def getPanNumber(stringData,dataRgex=['[A-Za-z]{5}\d{4}[A-Za-z]{1}','[A-Za-z]{4}\d{4}[A-Za-z]{2}']):
try:
for dataR in dataRgex:
res_find = re.findall(dataR, stringData)
except Exception as e:
print (e)
return res_find
def getAadharNumber(stringData,dataRgex=['(?<!\d)\d{4}\s\d{4}\s\d{4}(?!\d)']):
try:
res_find = re.findall(dataRgex[0], stringData)
except Exception as e:
print (e)
return res_find