You can download this code by clicking the button below.
This code is now available for download.
This function extracts keywords from the given text, filtering out common English words.
Technology Stack : os, re
Code Type : Function
Code Difficulty : Intermediate
import os
import re
def extract_keywords(text):
"""
提取文本中的关键词。
:param text: 要提取关键词的文本
:return: 一个列表,包含提取出的关键词
"""
# 使用正则表达式匹配可能的英文单词
keywords = re.findall(r'\b\w+\b', text)
# 过滤掉常见的英文单词,只保留可能的关键词
stop_words = set(['the', 'and', 'is', 'in', 'to', 'a', 'of', 'for', 'on', 'with', 'as', 'by', 'that', 'it', 'this', 'are', 'be', 'at', 'or', 'from', 'have', 'has', 'an', 'which', 'not', 'but', 'their', 'can', 'if', 'will', 'their', 'all', 'any', 'such', 'out', 'may', 'one', 'other', 'into', 'more', 'most', 'very', 'also', 'these', 'some', 'such', 'like', 'no', 'my', 'our', 'his', 'her', 'its', 'myself', 'yourself', 'himself', 'herself', 'itself', 'themselves', 'us', 'we', 'you', 'he', 'she', 'it', 'they', 'them', 'am', 'is', 'are', 'was', 'were', 'be', 'being', 'been', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once'])
filtered_keywords = [word for word in keywords if word.lower() not in stop_words]
return filtered_keywords