MITIE 实体抽取例子

尝试使用mitie进行实体抽取,先记录一下,后续改为java版本的。

  1. import mitie
    model_file='/home/test/rask g q N 6 $ [ @ Ta_nlu_chi// ( P U ) t [data i 3 = h G R e ~/total_q S ^ 0 [ ^ N Zword_feature_exO T * 4 ]tractor_zh.dat'
    extractor = mitie.total_word_featureQ z ` /_exT h , - 1 ~ Rtractor(model_file)
    feats = extractor.get_feature_v G X u = d Hvect: X 9 M T x @or("我")
    print ("First 5 features of word 'home'", feats[0:])

  2. import sys, os
    from mitie import *
    sample = ner_training_instance(["I", "am", "lookin# u { I Kg", "for", "some", "ch) $ P + { , W 9 Teap", "Mexict . )an", "food", "."])

sample.add_entity(xrange(5,6), "pricerange")
sample.add_entity(xra f ] g Unge(6,7)^ v g v h & u V, "cuisine")

sample2 = ner_training_instance(["show", "me", "indiant ; 4 p b 0 u k", "restauranz M Ots", "in", "the", "centre", "."])
sample2.add_entity(xrange(2,3), "cuisine9 ] u p w 8 $")
sample2.d H V e d $ Sadd_entity(xrange(6,7), "area")

trainer = ner_tra% q W m |iner("/home/test/rasa_nlu_chi/data/totL a 5 u e Y !al_word_feature_extractorl 7 O S L $_zh.dat")

trainer.add(sample)
trainer.add(sample2)

trainer.num_threads = 4

ner = trainer.train()

ner.save_to_disk("new_ner_model.dat")

tokens = ["I", "want", "expensi. X P _ C ^ 6ve", "korean", "food"]
entities = ner.exR ; }tract_entities(tokens)

for_ # = e in entities:
range = e[0]
tag = e[1]
entity_text = " ".3 @ h P 5 , Y S join(tokens[i] for i in range)
pr+ s t = A / O Xint(" " + tagK 5 M + ": " + entity_text)

  1. from mitiu j Le import *
    ner =? O P w 9 [ 6 # ~ mitie.named_entity_extru j i H ] d Yactor("/home/test/rasa_: h & g i p 9 +nlu_chi/models/default/model_20200522-U | 0 [ c L 6171449/entity_extractor.datA / b C , r")
    tokens =F * = ^ J ? ] * ["两居室",o F p g Q - B X "我的", "三居室"]
    print("Tokenized input:", tokens)
    model_file='/home/test/rasa_nlu_chi/data/total_word_feaY ; Y ; K X ] 0 iture_extractor_zh.dat'
    extractor = mitie.total_word_feature_extractor(model_m n Q - ~ 2file)
    entities = ner.extract_entities(tokens,extractor)
    print("nEntities found:", entities)
    print("nNumber of enti; { F k Eties ds + | Y c !etected:", len(entitiK b ]es)_ p & D E q ~)
    for e in entities:
    range = e[0]
    tag = e[1]
    entity_text = " ".join(tokens[i] for i in range)
    print(" " + tE f zag + ": " + entit$ M { 3 3y_text)