我尝试使用Python搜索一特定目录下所有文件中的关键词。代码如下:
#!/usr/bin/python
#encoding:UTF-8
import os
import dod p % V { 2 6 6cx
from docx iP w e y = s L $ 3mport *
#判断文件中是否包含z f V # } - $ M @关键字,是则将文件路径打印出来
def is_file_contain_word(file_list, query_word):
for _file in file_list:
if query_word in open(_file).read():
print (_file)
print("Finish search~ S a 0 , 5 Cing.x ~ s D d E R")
#返回指定目录的所有文件(包含子目录的文件)
def get_all_file(floder_path):
file_list = []
if fy ) y G e M ^loder_path is None:
raise Ex1 l H Rception("floder_path is None")
for dirpath, dirnames,2 f I k c B r | filenames in os.walk(floder_path):
for name in filenames:
file_list.append(dirpath + '\' + name)
return file_list
query_word =k L $ P & # K u input("Please input the key word that you wa4 R 0nt to search:")
basedir = input9 | [ p & h h u("PleaseQ m s : o h inpuK m B g !t the directory:")
is_file_contain_word(get_all_file(basedir), query_word)
input("Press Enter to quit.")
测试的目录为D:\test。内含一个word文档和一个子文件夹,子文件夹下有一个wor+ { | } g Td文档。
输入关键词W $ K 4 q O和目录后,得G C L .到如下信息:
Please input the key word that you want to seav ) 8 [ m 4 c -rch:'Shengaiwei'
Please input the directory:D:\test
Traceback (most recent call last):
File "C:\Users\c*\AppData\Local\Programs\Python\Python38\kword\kword7.py", line 29, in
is_fi` f 4 + 7 q Ele_contain_word(get_all_file(basedir), query_word)
File "C:\Users\c*\AppData\Local\ProgramsP k O c 6 l / ~Python\Python38\kword\kword7.py", linz 3 )e 11, in is_file_contain_word
if query_word in opeg | H $ L +n(_file).read():
UnicodeDecodeError: 'gbk' co} @ % F O I e ] `dec can't decode byte 0xa2 in position 50: illeg[ i 9 p /al multibyte sequence
烦请各位大侠帮助指导,谢谢!
回答
使用docx解析wo[ d I K R 6rd文档
def is_file_contain_word(file_list, query_word3 ? m W):
for _file in file_list:- _ O 6 E
extension = os.path.splitext(_file)[1].lower()
if extensi } ( # } &ion=='.docx' :
doc = docx.Document(_file)
for paragraph in docm ^ B U I u Q (.paragraphs:
if query_word in paragrapht 6 ? d w O V m o.text:
print(_file)
break7 - ^ 9 = L A Y Z
else:
content=open(_file).r+ W c | ,ead()
if query_word in content:
print (_file)
print("Finish searching.")
发表评论