|
34 | 34 | },
|
35 | 35 | {
|
36 | 36 | "cell_type": "code",
|
37 |
| - "execution_count": 3, |
| 37 | + "execution_count": 1, |
38 | 38 | "id": "c1032770",
|
39 | 39 | "metadata": {},
|
40 | 40 | "outputs": [
|
|
75 | 75 | },
|
76 | 76 | {
|
77 | 77 | "cell_type": "code",
|
78 |
| - "execution_count": 16, |
| 78 | + "execution_count": 10, |
79 | 79 | "id": "9310b74e",
|
80 | 80 | "metadata": {},
|
81 | 81 | "outputs": [],
|
|
357 | 357 | },
|
358 | 358 | {
|
359 | 359 | "cell_type": "code",
|
360 |
| - "execution_count": 22, |
| 360 | + "execution_count": 19, |
361 | 361 | "id": "0d713766",
|
362 | 362 | "metadata": {},
|
363 | 363 | "outputs": [
|
364 | 364 | {
|
365 | 365 | "name": "stdout",
|
366 | 366 | "output_type": "stream",
|
367 | 367 | "text": [
|
368 |
| - "15659252742081136678 simple_phrase 42 45 Auf dem Bild\n", |
369 |
| - "15659252742081136678 simple_phrase 48 51 mit den Rapunzeln\n", |
370 |
| - "15659252742081136678 simple_phrase 104 107 an einem Feld\n", |
371 |
| - "15659252742081136678 simple_phrase 121 124 um das Feld\n", |
372 |
| - "15659252742081136678 simple_phrase 129 132 auf das Feld\n", |
373 |
| - "15659252742081136678 simple_phrase 146 149 vor der Zauberin\n", |
374 |
| - "15659252742081136678 simple_phrase 166 169 Auf dem Feld\n", |
375 |
| - "15659252742081136678 simple_phrase 169 172 von der Zauberin\n", |
376 |
| - "15659252742081136678 simple_phrase 188 191 von den Rapunzeln\n", |
377 |
| - "15659252742081136678 simple_phrase 197 200 von den Rapunzeln\n", |
378 |
| - "15659252742081136678 simple_phrase 208 211 zu ihrem Mann\n", |
379 |
| - "15659252742081136678 simple_phrase 239 242 Auf dem Bild\n", |
380 |
| - "15659252742081136678 simple_phrase 282 285 mit den Rapunzeln\n", |
381 |
| - "15659252742081136678 simple_phrase 294 297 auf das Feld\n", |
382 |
| - "15659252742081136678 simple_phrase 311 314 auf die Nacht\n", |
383 |
| - "15659252742081136678 simple_phrase 341 344 über die Mauer\n", |
384 |
| - "15659252742081136678 simple_phrase 348 351 auf das Feld\n", |
385 |
| - "15659252742081136678 simple_phrase 384 387 von den Rapunzeln\n", |
386 |
| - "15659252742081136678 simple_phrase 394 397 zu ihrem Mann\n", |
387 |
| - "15659252742081136678 simple_phrase 454 457 über die Mauer\n", |
388 |
| - "15659252742081136678 simple_phrase 461 464 auf das Feld\n", |
389 |
| - "15659252742081136678 simple_phrase 478 481 vor dem Mann\n", |
390 |
| - "15659252742081136678 simple_phrase 530 533 Auf dem Bild\n", |
391 |
| - "15659252742081136678 simple_phrase 609 612 von meinen Rapunzeln\n", |
392 |
| - "15659252742081136678 simple_phrase 645 648 für die Rapunzeln\n", |
393 |
| - "15659252742081136678 simple_phrase 712 715 in der Tür\n", |
394 |
| - "15659252742081136678 simple_phrase 740 743 in einen Turm\n", |
395 |
| - "15659252742081136678 simple_phrase 771 774 in dem Turm\n", |
396 |
| - "15659252742081136678 simple_phrase 807 810 auf die Erde\n", |
397 |
| - "15659252742081136678 simple_phrase 853 856 Auf dem Bild\n", |
398 |
| - "15659252742081136678 simple_phrase 866 869 in den Turm\n", |
399 |
| - "15659252742081136678 simple_phrase 874 877 in dem Turm\n", |
400 |
| - "15659252742081136678 simple_phrase 921 924 zu einem Zopf\n", |
401 |
| - "15659252742081136678 simple_phrase 954 957 An einem Tag\n", |
402 |
| - "15659252742081136678 simple_phrase 960 963 durch den Wald\n", |
403 |
| - "15659252742081136678 simple_phrase 968 971 an dem Turm\n", |
404 |
| - "15659252742081136678 simple_phrase 1007 1010 in den Turm\n", |
405 |
| - "15659252742081136678 simple_phrase 1032 1035 auf seinem Pferd\n", |
406 |
| - "15659252742081136678 simple_phrase 1049 1052 von dem Mädchen\n", |
407 |
| - "15659252742081136678 simple_phrase 1064 1067 zu dem Turm\n", |
408 |
| - "15659252742081136678 simple_phrase 1076 1079 von dem Mädchen\n", |
409 |
| - "15659252742081136678 simple_phrase 1087 1090 hinter einem Baum\n", |
410 |
| - "15659252742081136678 simple_phrase 1149 1152 in den Turm\n", |
411 |
| - "15659252742081136678 simple_phrase 1206 1209 Auf dem Bild\n", |
412 |
| - "15659252742081136678 simple_phrase 1298 1301 vor dem Prinzen\n", |
413 |
| - "15659252742081136678 simple_phrase 1352 1355 aus dem Turm\n", |
414 |
| - "15659252742081136678 simple_phrase 1379 1382 Aus dem Stoff\n", |
415 |
| - "15659252742081136678 simple_phrase 1413 1416 in mein Schloss\n", |
416 |
| - "15659252742081136678 simple_phrase 1444 1447 von dem Prinzen\n", |
417 |
| - "15659252742081136678 simple_phrase 1457 1460 An einem Abend\n", |
418 |
| - "15659252742081136678 simple_phrase 1468 1471 zu der Zauberin\n", |
419 |
| - "15659252742081136678 simple_phrase 1566 1569 An diesem Ort\n", |
420 |
| - "15659252742081136678 simple_phrase 1700 1703 vor der Zauberin\n", |
421 |
| - "15659252742081136678 simple_phrase 1709 1712 aus dem Turm∙fenster\n", |
422 |
| - "15659252742081136678 simple_phrase 1751 1754 durch den Wald\n", |
423 |
| - "15659252742081136678 simple_phrase 1774 1777 Nach vielen Jahren\n", |
424 |
| - "15659252742081136678 simple_phrase 1821 1824 auf die Stimme\n", |
425 |
| - "15659252742081136678 simple_phrase 1850 1853 Auf dem Bild\n", |
426 |
| - "15659252742081136678 simple_phrase 1861 1864 von dem Prinzen\n", |
427 |
| - "15659252742081136678 simple_phrase 1880 1883 auf den Prinzen\n", |
428 |
| - "15659252742081136678 simple_phrase 1943 1946 von dem Prinzen\n", |
429 |
| - "15659252742081136678 simple_phrase 1963 1966 auf sein Schloss\n" |
| 368 | + "wissen von den Rapunzeln\n", |
| 369 | + "ART\n", |
| 370 | + "klettert über die Mauer\n", |
| 371 | + "ART\n", |
| 372 | + "geht auf das Feld\n", |
| 373 | + "ART\n", |
| 374 | + "geht auf das Feld\n", |
| 375 | + "ART\n", |
| 376 | + "wächst in dem Turm\n", |
| 377 | + "ART\n", |
| 378 | + "reitet an dem Turm\n", |
| 379 | + "ART\n", |
| 380 | + "sagt zu der Zauberin\n", |
| 381 | + "ART\n", |
| 382 | + "geht auf die Stimme\n", |
| 383 | + "ART\n", |
| 384 | + "läuft auf den Prinzen\n", |
| 385 | + "ART\n" |
430 | 386 | ]
|
431 | 387 | }
|
432 | 388 | ],
|
433 | 389 | "source": [
|
434 | 390 | "import spacy\n",
|
435 | 391 | "from spacy.matcher import Matcher\n",
|
| 392 | + "from spacy.tokens import Span\n", |
436 | 393 | "\n",
|
437 | 394 | "# load model\n",
|
438 | 395 | "nlp = spacy.load(\"de_core_news_sm\")\n",
|
|
444 | 401 | "# pattern = [{\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}, {\"POS\": \"VERB\"}, {\"POS\": \"ADP\"}, {\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}]\n",
|
445 | 402 | "# pattern = [{\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}, {\"POS\": \"VERB\"}, {\"POS\": \"ADV\"}, {\"POS\": \"PRON\"}]\n",
|
446 | 403 | "# pattern = [{\"POS\": \"ADV\"}, {\"POS\": \"VERB\"}, {\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}, {\"POS\": \"PRON\"}]\n",
|
447 |
| - "pattern = [{\"POS\": \"ADP\"}, {\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}]\n", |
| 404 | + "pattern = [{\"POS\": \"VERB\"}, {\"POS\": \"ADP\"}, {\"POS\": \"DET\"}, {\"POS\": \"NOUN\"}]\n", |
448 | 405 | "matcher.add(\"simple_phrase\", [pattern])\n",
|
449 | 406 | "\n",
|
450 | 407 | "doc = nlp(text)\n",
|
451 |
| - "matches = matcher(doc)\n", |
452 |
| - "# print(matches)\n", |
453 |
| - "for match_id, start, end in matches:\n", |
454 |
| - " string_id = nlp.vocab.strings[match_id] # Get string representation\n", |
455 |
| - " span = doc[start:end] # The matched span\n", |
456 |
| - " print(match_id, string_id, start, end, span.text)" |
| 408 | + "matches = matcher(doc, as_spans=True)\n", |
| 409 | + "for span in matches:\n", |
| 410 | + " print(span)\n", |
| 411 | + " print(span[2].tag_)\n", |
| 412 | + " " |
457 | 413 | ]
|
458 | 414 | },
|
459 | 415 | {
|
460 | 416 | "cell_type": "code",
|
461 | 417 | "execution_count": null,
|
462 |
| - "id": "399b1151", |
| 418 | + "id": "8adb005f", |
463 | 419 | "metadata": {},
|
464 | 420 | "outputs": [],
|
465 | 421 | "source": []
|
|
0 commit comments