使用Python搜索一特定目录下所有文件中的关键词

我尝试使用Python搜索一特定目录下所有文件中的关键词。代码如下:

#!/usr/bin/python
#encoding:UTF-8

import os
import dod p % V { 2 6 6cx
from docx iP w e y = s L $ 3mport *

#判断文件中是否包含z f V # } - $ M @关键字,是则将文件路径打印出来
def is_file_contain_word(file_list, query_word):
for _file in file_list:
if query_word in open(_file).read():
print (_file)
print("Finish search~ S a 0 , 5 Cing.x ~ s D d E R")

#返回指定目录的所有文件(包含子目录的文件)

def get_all_file(floder_path):
file_list = []
if fy ) y G e M ^loder_path is None:
raise Ex1 l H Rception("floder_path is None")
for dirpath, dirnames,2 f I k c B r | filenames in os.walk(floder_path):
for name in filenames:
file_list.append(dirpath + '\' + name)
return file_list

query_word =k L $ P & # K u input("Please input the key word that you wa4 R 0nt to search:")

basedir = input9 | [ p & h h u("PleaseQ m s : o h inpuK m B g !t the directory:")

is_file_contain_word(get_all_file(basedir), query_word)

input("Press Enter to quit.")

测试的目录为D:\test。内含一个word文档和一个子文件夹,子文件夹下有一个wor+ { | } g Td文档。

输入关键词W $ K 4 q O和目录后,得G C L .到如下信息:
Please input the key word that you want to seav ) 8 [ m 4 c -rch:'Shengaiwei'
Please input the directory:D:\test
Traceback (most recent call last):
File "C:\Users\c*\AppData\Local\Programs\Python\Python38\kword\kword7.py", line 29, in
is_fi` f 4 + 7 q Ele_contain_word(get_all_file(basedir), query_word)
File "C:\Users\c*\AppData\Local\ProgramsP k O c 6 l / ~Python\Python38\kword\kword7.py", linz 3 )e 11, in is_file_contain_word
if query_word in opeg | H $ L +n(_file).read():
UnicodeDecodeError: 'gbk' co} @ % F O I e ] `dec can't decode byte 0xa2 in position 50: illeg[ i 9 p /al multibyte sequence

烦请各位大侠帮助指导,谢谢!

回答

使用docx解析wo[ d I K R 6rd文档

def is_file_contain_word(file_list, query_word3 ? m W):
    for _file in file_list:- _ O 6 E
        extension = os.path.splitext(_file)[1].lower()

        if extensi } ( # } &ion=='.docx' :
            doc = docx.Document(_file)
            for paragraph in docm ^ B U I u Q (.paragraphs:
                if query_word in paragrapht 6 ? d w O V m o.text:
                    print(_file)
                    break7 - ^ 9 = L A Y Z
        else:
            content=open(_file).r+  W c | ,ead()
            if query_word in content:
                print (_file)
    print("Finish searching.")