過去にソースコード類似調査等でよく使いました。
textをグレップしてExcelにできます。
greptoexcel.py
import sys
import os.path
import re
import openpyxl
from apl.io.ioutil import *
from apl.io.simple_config import *
from typing import Dict,List
from apl.excollections.elist import Enumerable as Elist
from openpyxl.utils import get_column_letter
CFG_KEY_SUMMARY_ONLY="SUMMARY_ONLY"
CFG_KEY_ENCODING="ENCODING"
CFG_KEY_ROOT_DIR="ROOT_DIR"
CFG_KEY_FILE_FILTER="FILE_FILTER"
CFG_KEY_OUTPUT_DIR="OUTPUT_DIR"
CFG_KEY_REG_PREFIX="REG"
CFG_VAL_SUM_ONLY = "1"
CFG_VAL_SUM_NOT_ONLY = "0"
SAVE_FILE_NAME = "result.xlsx"
SUMMARY_SHEET_NAME = "#SUMMARY#"
HIT_VALUE = "〇"
MAX_SHEET_NAME_LEN = 26 #シート数1000(4桁),シート名30桁 → 26桁
def check_cfg_key(cfg:Dict[str,str]) -> None:
keys = [CFG_KEY_SUMMARY_ONLY,CFG_KEY_ENCODING,CFG_KEY_FILE_FILTER,CFG_KEY_OUTPUT_DIR,CFG_KEY_ROOT_DIR]
for key in keys:
if key not in cfg:
raise Exception("keyが存在しません:" + key)
if len(cfg[key]) == 0:
raise Exception("値が設定されてません:" + key)
dir_exists_with_exp(cfg[CFG_KEY_OUTPUT_DIR])
dir_exists_with_exp(cfg[CFG_KEY_ROOT_DIR])
if cfg[CFG_KEY_SUMMARY_ONLY] not in [CFG_VAL_SUM_NOT_ONLY,CFG_VAL_SUM_ONLY]:
raise Exception("値が不正:" + CFG_KEY_SUMMARY_ONLY)
def check_reg_key(cfg:Dict[str,str]) -> List[str]:
uselist = []
for i in range(1,6):
key = CFG_KEY_REG_PREFIX + str(i).zfill(2)
if (key in cfg) and (len(cfg[key]) != 0):
uselist.append(key)
if len(uselist) == 0:
raise Exception("有効な定義なし:" + CFG_KEY_REG_PREFIX)
return uselist
def create_sheet_name(wb:openpyxl.Workbook,fullpath:str) -> str:
sheetname = os.path.basename(fullpath)[0:MAX_SHEET_NAME_LEN]
if sheetname not in wb.sheetnames:
return sheetname
for i in range(1,1000):
if sheetname + str(i) not in wb.sheetnames:
return sheetname + str(i)
return "dummey"
def get_hit_value(f:bool) -> str:
if f:
return HIT_VALUE
else:
return ""
def get_hyperlink(f:bool,file:str,sn:str,r:int,c:int) -> str:
if f:
#print("=HYPERLINK(\"#" + sn + "!" + get_column_letter(c) + str(r) + "\",\"" + HIT_VALUE + "\")")
#return "=HYPERLINK(#" + sn + "!" + get_column_letter(c) + str(r) + ",\"" + HIT_VALUE + "\")"
#return f"{file}#{sn}!" + get_column_letter(c) + str(r)
return f"#{sn}!" + get_column_letter(c) + str(r)
else:
return ""
def create_excel(cfg:Dict[str,str],fs:List[str],keylist:List[str]):
wb = openpyxl.Workbook()
wb.worksheets[0].title = SUMMARY_SHEET_NAME
sws = wb.worksheets[0]
# Write Summary Header
crnt_sum_row = 1
sws.cell(row=crnt_sum_row,column=1).value = "file"
sws.cell(row=crnt_sum_row,column=2).value = "sheet"
for i in range(0,len(keylist)):
sws.cell(row=crnt_sum_row,column=3 + i).value = cfg[keylist[i]]
crnt_sum_row += 1
# File Write
for fc,f in enumerate(fs):
print(str(fc + 1) + "/" + str(len(fs)) + ":" + f)
line_count = 1
with open(f,mode="r",encoding=cfg[CFG_KEY_ENCODING]) as fp:
iws = None if cfg[CFG_KEY_SUMMARY_ONLY] == CFG_VAL_SUM_ONLY else wb.create_sheet(title=create_sheet_name(wb,f))
# iws header
if iws != None:
for c,key in enumerate(keylist):
iws.cell(row=line_count,column=c + 1).value = cfg[key]
iws.cell(row=line_count,column=len(keylist) + 1).value = "line"
line_count += 1
# file line
for line in fp:
line = line.rstrip('\r\n')
results = []
for key in keylist:
if re.search(cfg[key],line):
results.append(True)
else:
results.append(False)
# summary write
if Elist(results).any(lambda x:x):
sws.cell(row=crnt_sum_row,column=1).value = f
if iws != None:
sws.cell(row=crnt_sum_row,column=2).hyperlink = get_hyperlink(True,SAVE_FILE_NAME,iws.title,1,1)
sws.cell(row=crnt_sum_row,column=2).value = iws.title
for i,r in enumerate(results):
if r:
if iws != None:
sws.cell(row=crnt_sum_row,column=3 + i).hyperlink = get_hyperlink(r,SAVE_FILE_NAME,iws.title,line_count,1 + i)
sws.cell(row=crnt_sum_row,column=3 + i).value = get_hit_value(r)
sws.cell(row=crnt_sum_row,column=3 + len(results)).value = line
crnt_sum_row += 1
# iws write
if iws != None:
for i,r in enumerate(results):
iws.cell(row=line_count,column=i + 1).value = get_hit_value(r)
iws.cell(row=line_count,column=len(results) + 1).value = line
line_count += 1
# Save
wb.save(os.path.join(cfg[CFG_KEY_OUTPUT_DIR],SAVE_FILE_NAME))
if __name__ == '__main__':
if len(sys.argv) != 2:
raise Exception("引数不正:python greptoexcel.py config_file")
file_exists_with_exp(sys.argv[1])
cfg = create_config(sys.argv[1],"utf-8")
check_cfg_key(cfg)
keylist = check_reg_key(cfg)
fs = find_file_recur(cfg[CFG_KEY_ROOT_DIR],PathFilter.regex(cfg[CFG_KEY_FILE_FILTER]))
create_excel(cfg,fs,keylist)
grep.cfg
SUMMARY_ONLY=0
ENCODING=utf-8
ROOT_DIR=
FILE_FILTER=^.*\.vue$
OUTPUT_DIR=C:\mysoft\py\apl\tools\output
REG01=in
REG02=out
REG03=if
REG04=