You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
2.5 KiB
84 lines
2.5 KiB
import PyPDF2 |
|
import unicodedata |
|
import os |
|
import re |
|
|
|
|
|
def create_subfolder_if_not_exists(): |
|
subfolder_name = "notasProcessadas" |
|
if not os.path.isdir(subfolder_name): |
|
os.mkdir(subfolder_name) |
|
|
|
def extract_num_nota(filename): |
|
start = filename.rfind("_") |
|
end = filename.find(".pdf") |
|
return filename[start+1:end] |
|
|
|
def is_pdf(filename): |
|
return filename[-4:] == ".pdf" |
|
|
|
def filter_interesting_lines(lines_array): |
|
lines_array_res = [] |
|
flag = False |
|
for line in lines_array: |
|
if (line.find("TOMADOR DE SERVI") > -1): |
|
flag = True |
|
elif line.find("NOTA FISCAL DE SERVI") > -1: |
|
flag = False |
|
|
|
if flag: |
|
lines_array_res.append(line) |
|
return lines_array_res |
|
|
|
def condition_nome_razao(input_str): |
|
input_str = input_str.upper() |
|
return input_str.find("NOME/RAZ") > -1 |
|
|
|
def format_nome_razao(input_str): |
|
input_str = input_str[input_str.find(":")+1:].strip().replace(' ', '_') |
|
input_str = unicodedata.normalize('NFKD', input_str).encode('ascii', 'ignore').decode('ascii') |
|
input_str = input_str.upper() |
|
return input_str[:40] |
|
|
|
|
|
def condition_cpf_cnpj(input_str): |
|
return input_str.find("CPF/CNPJ") > -1 |
|
|
|
def string_to_numeric(input_str): |
|
res = "" |
|
for char in input_str: |
|
if char.isnumeric(): |
|
res += char |
|
return res |
|
|
|
def format_cpf_cnpj(input_str): |
|
pattern=re.search(r'\d{2}\.\d{3}\.\d{3}\/\d{4}\-\d{2}', input_str) |
|
if pattern != None: |
|
return string_to_numeric(input_str[pattern.start():pattern.end()]) |
|
else: |
|
pattern=re.search(r'\d{3}\.\d{3}\.\d{3}\-\d{2}', input_str) |
|
if pattern != None: |
|
return string_to_numeric(input_str[pattern.start():pattern.end()]) |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
create_subfolder_if_not_exists() |
|
dir_processadas = "notasProcessadas" |
|
filenames = os.listdir() |
|
for filename in filenames: |
|
if (is_pdf(filename)): |
|
reader = PyPDF2.PdfReader(filename) |
|
interesting_string_page1 = filter_interesting_lines(reader.pages[0].extract_text().split('\n')) |
|
num_nota = extract_num_nota(filename) |
|
for line in interesting_string_page1: |
|
if (condition_nome_razao(line)): |
|
nome_razao = format_nome_razao(line) |
|
elif condition_cpf_cnpj(line): |
|
cpf_cnpj = format_cpf_cnpj(line) |
|
newFilename = nome_razao + "_" + cpf_cnpj + "_" + num_nota + ".pdf" |
|
newPlace = os.path.join(dir_processadas, newFilename) |
|
os.rename(filename, newPlace) |
|
main()
|
|
|