Keyword Extraction and Stop Word Filtering

  • Share this:

Code introduction


This function extracts keywords from the given text and filters out common stop words.


Technology Stack : Regular expressions, set

Code Type : Function

Code Difficulty : Intermediate


                
                    
import os
import re
import json
import random
import math
import html
import datetime

def extract_keywords(text):
    """
    提取文本中的关键词。
    
    :param text: 输入的文本
    :return: 返回一个包含关键词的列表
    """
    # 使用正则表达式匹配关键词
    keywords = re.findall(r'\b\w+\b', text)
    # 过滤掉一些常见的停用词
    stop_words = set(['the', 'and', 'is', 'in', 'it', 'of', 'to'])
    filtered_keywords = [word for word in keywords if word.lower() not in stop_words]
    return filtered_keywords