import spacy from zshot import PipelineConfig, displacy from zshot.linker import LinkerRegen from zshot.mentions_extractor import MentionsExtractorSpacy from zshot.utils.data_models import Entity nlp = spacy.load('en_core_web_sm') # zero shot definition of entities nlp_config = PipelineConfig( mentions_extractor=MentionsExtractorSpacy(), linker=LinkerRegen(), entities=[ Entity(name='Paris', description='Paris is located in northern central France, in a north-bending arc of the river Seine'), Entity(name='IBM', description='International Business Machines Corporation (IBM) is an American multinational technology corporation headquartered in Armonk, New York'), Entity(name='New York', description='New York is a city in U.S. state'), Entity(name='Florida', description='southeasternmost U.S. state'), Entity(name='American', description='American, something of, from, or related to the United States of America, commonly known as the United States or America'), Entity(name='Chemical formula', description='In chemistry, a chemical formula is a way of presenting information about the chemical proportions of atoms that constitute a particular chemical compound or molecul'), Entity(name='Acetamide', description='Acetamide (systematic name: ethanamide) is an organic compound with the formula CH3CONH2. It is the simplest amide derived from acetic acid. It finds some use as a plasticizer and as an industrial solvent.'), Entity(name='Armonk', description='Armonk is a hamlet and census-designated place (CDP) in the town of North Castle, located in Westchester County, New York, United States.'), Entity(name='Acetic Acid', description='Acetic acid, systematically named ethanoic acid, is an acidic, colourless liquid and organic compound with the chemical formula CH3COOH'), Entity(name='Industrial solvent', description='Acetamide (systematic name: ethanamide) is an organic compound with the formula CH3CONH2. It is the simplest amide derived from acetic acid. It finds some use as a plasticizer and as an industrial solvent.'), ] ) nlp.add_pipe('zshot', config=nlp_config, last=True) text = 'International Business Machines Corporation (IBM) is an American multinational technology corporation' \ ' headquartered in Armonk, New York, with operations in over 171 countries.' doc = nlp(text) displacy.serve(doc, style='ent')
Found a mistake or something isn't working?
If you've come across a universe project that isn't working or is incompatible with the reported spaCy version, let us know by opening a discussion thread.
Submit your project
If you have a project that you want the spaCy community to make use of, you can suggest it by submitting a pull request to the spaCy website repository. The Universe database is open-source and collected in a simple JSON file. For more details on the formats and available fields, see the documentation. Looking for inspiration your own spaCy plugin or extension? Check out the project idea section in Discussions.