You can download this code by clicking the button below.
This code is now available for download.
This function extracts keywords from the given text and filters out common stop words.
Technology Stack : Regular expressions, set
Code Type : Function
Code Difficulty : Intermediate
import os
import re
import json
import random
import math
import html
import datetime
def extract_keywords(text):
"""
提取文本中的关键词。
:param text: 输入的文本
:return: 返回一个包含关键词的列表
"""
# 使用正则表达式匹配关键词
keywords = re.findall(r'\b\w+\b', text)
# 过滤掉一些常见的停用词
stop_words = set(['the', 'and', 'is', 'in', 'it', 'of', 'to'])
filtered_keywords = [word for word in keywords if word.lower() not in stop_words]
return filtered_keywords