|
| 1 | +import string, pprint |
| 2 | +import pycrm114 as p |
| 3 | + |
| 4 | +import texts |
| 5 | +Alice_frag = \ |
| 6 | + "So she was considering in her own mind (as well as she could, for the\n" \ |
| 7 | + "hot day made her feel very sleepy and stupid), whether the pleasure\n" \ |
| 8 | + "of making a daisy-chain would be worth the trouble of getting up and\n" \ |
| 9 | + "picking the daisies, when suddenly a White Rabbit with pink eyes ran\n" \ |
| 10 | + "close by her.\n" |
| 11 | +Hound_frag = \ |
| 12 | + "\"Well, Watson, what do you make of it?\"\n" \ |
| 13 | + "Holmes was sitting with his back to me, and I had given him no\n" \ |
| 14 | + "sign of my occupation.\n" \ |
| 15 | + "\"How did you know what I was doing? I believe you have eyes in\n" \ |
| 16 | + "the back of your head.\"\n" |
| 17 | +Macbeth_frag = \ |
| 18 | +" Double, double, toil and trouble;\n" \ |
| 19 | +" Fire, burn; and cauldron, bubble.\n" \ |
| 20 | +" \n" \ |
| 21 | +" SECOND WITCH.\n" \ |
| 22 | +" Fillet of a fenny snake,\n" \ |
| 23 | +" In the caldron boil and bake;\n" \ |
| 24 | +" Eye of newt, and toe of frog,\n" \ |
| 25 | +" Wool of bat, and tongue of dog,\n" \ |
| 26 | +" Adder's fork, and blind-worm's sting,\n" \ |
| 27 | +" Lizard's leg, and howlet's wing,--\n" \ |
| 28 | +" For a charm of powerful trouble,\n" \ |
| 29 | +" Like a hell-broth boil and bubble.\n" \ |
| 30 | + |
| 31 | +Willows_frag = \ |
| 32 | + "'This is fine!' he said to himself. 'This is better than whitewashing!'\n" \ |
| 33 | + "The sunshine struck hot on his fur, soft breezes caressed his heated\n" \ |
| 34 | + "brow, and after the seclusion of the cellarage he had lived in so long\n" \ |
| 35 | + "the carol of happy birds fell on his dulled hearing almost like a shout." |
| 36 | + |
| 37 | + |
| 38 | +cb = p.ControlBlock(flags=(p.CRM114_SVM | p.CRM114_STRING), |
| 39 | + classes=[("Alice", True), ("Macbeth", False)], |
| 40 | + start_mem = 8000000) |
| 41 | + |
| 42 | +cb.dump(file("test_cb_dump.txt", 'w')) |
| 43 | +cb = p.ControlBlock.load(file("test_cb_dump.txt", 'r')) |
| 44 | + |
| 45 | +db = p.DataBlock(cb) |
| 46 | + |
| 47 | +print " Starting to learn the 'Alice in Wonderland' text" |
| 48 | +db.learn_text(0, texts.Alice) |
| 49 | + |
| 50 | +print " Starting to learn the 'MacBeth' text" |
| 51 | +db.learn_text(1, texts.Macbeth) |
| 52 | + |
| 53 | +print " Writing our datablock as 'simple_demo_datablock.txt'." |
| 54 | +db.dump(file("simple_demo_datablock.txt", 'w')) |
| 55 | + |
| 56 | +print " Reading text form back in." |
| 57 | +db = p.DataBlock.load(file("simple_demo_datablock.txt", 'r')) |
| 58 | + |
| 59 | +print " Classifying the 'Alice' text." |
| 60 | +s = db.classify_text(Alice_frag) |
| 61 | +print ("Best match: %s Tot succ prob: %f overall_pR: %f unk_features: %d" |
| 62 | + % (s.best_match(), s.tsprob(), s.overall_pR(), s.unk_features())) |
| 63 | +for sc in s.scores(): |
| 64 | + print ("documents: %d features: %d hits: %d prob: %f pR: %f" % |
| 65 | + (sc["documents"], sc["features"], sc["hits"], sc["prob"], sc["pR"])) |
| 66 | + |
| 67 | +print " Classifying the 'Macbeth' text." |
| 68 | +s = db.classify_text(Macbeth_frag) |
| 69 | +print ("Best match: %s Tot succ prob: %f overall_pR: %f unk_features: %d" |
| 70 | + % (s.best_match(), s.tsprob(), s.overall_pR(), s.unk_features())) |
| 71 | +for sc in s.scores(): |
| 72 | + print ("documents: %d features: %d hits: %d prob: %f pR: %f" % |
| 73 | + (sc["documents"], sc["features"], sc["hits"], sc["prob"], sc["pR"])) |
| 74 | + |
| 75 | +print " Classifying the 'Hound' text." |
| 76 | +s = db.classify_text(Hound_frag) |
| 77 | +print ("Best match: %s Tot succ prob: %f overall_pR: %f unk_features: %d" |
| 78 | + % (s.best_match(), s.tsprob(), s.overall_pR(), s.unk_features())) |
| 79 | +for sc in s.scores(): |
| 80 | + print ("documents: %d features: %d hits: %d prob: %f pR: %f" % |
| 81 | + (sc["documents"], sc["features"], sc["hits"], sc["prob"], sc["pR"])) |
| 82 | + |
| 83 | +print " Classifying the 'Wind in the Willows' text." |
| 84 | +s = db.classify_text(Willows_frag) |
| 85 | +print ("Best match: %s Tot succ prob: %f overall_pR: %f unk_features: %d" |
| 86 | + % (s.best_match(), s.tsprob(), s.overall_pR(), s.unk_features())) |
| 87 | +for sc in s.scores(): |
| 88 | + print ("documents: %d features: %d hits: %d prob: %f pR: %f" % |
| 89 | + (sc["documents"], sc["features"], sc["hits"], sc["prob"], sc["pR"])) |
0 commit comments