You can download this code by clicking the button below.
This code is now available for download.
This function uses the EntityRuler component of spaCy to create custom entity recognition patterns and adds these patterns to the spaCy NLP pipeline.
Technology Stack : spaCy, EntityRuler
Code Type : Function
Code Difficulty : Intermediate
import spacy
from spacy.tokens import Span
from spacy.pipeline import EntityRuler
def create_entity_patterns(entity_patterns):
"""
Create an EntityRuler component for spaCy and add custom entity patterns.
:param entity_patterns: List of dictionaries, each containing 'label' and 'pattern' keys.
:return: None
"""
nlp = spacy.load("en_core_web_sm")
ruler = EntityRuler(nlp)
patterns = [EntityRuler.PATTERN(pattern=p["pattern"], label=p["label"]) for p in entity_patterns]
ruler.add_patterns(patterns)
nlp.add_pipe(ruler, before="ner")
return nlp
entity_patterns = [
{"label": "ORG", "pattern": "Microsoft"},
{"label": "GPE", "pattern": "New York"},
{"label": "PERSON", "pattern": "John Doe"}
]
nlp = create_entity_patterns(entity_patterns)
doc = nlp("Microsoft was founded by Bill Gates in New York. John Doe works there.")