(5) Add initial stanza pipeline results

50aa3ab7 · Almouhannad Hafez · 6593c212 · 50aa3ab7
Commit 50aa3ab7 authored Nov 16, 2024 by Almouhannad Hafez
Hide whitespace changes
Inline Side-by-side

Showing with 67 additions and 0 deletions

5.0.Process_texts_stanza.ipynb 5.0.Process_texts_stanza.ipynb +67 -0

No files found.
--- a/5.0.Process_texts_stanza.ipynb
+++ b/5.0.Process_texts_stanza.ipynb
@@ -113,6 +113,13 @@
    "    loaded_data = pickle.load(f)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ***Load processed texts***"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 7,
@@ -510,6 +517,66 @@
   "source": [
    "print(doc_0.sentences)"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ***Apply new pipeline step***"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "id: 1\tword: I\thead id: 4\thead: experiencing\tdeprel: nsubj\n",
+      "id: 2\tword: have\thead id: 4\thead: experiencing\tdeprel: aux\n",
+      "id: 3\tword: been\thead id: 4\thead: experiencing\tdeprel: aux\n",
+      "id: 4\tword: experiencing\thead id: 0\thead: root\tdeprel: root\n",
+      "id: 5\tword: a\thead id: 7\thead: rash\tdeprel: det\n",
+      "id: 6\tword: skin\thead id: 7\thead: rash\tdeprel: compound\n",
+      "id: 7\tword: rash\thead id: 4\thead: experiencing\tdeprel: obj\n",
+      "id: 8\tword: on\thead id: 10\thead: arms\tdeprel: case\n",
+      "id: 9\tword: my\thead id: 10\thead: arms\tdeprel: nmod:poss\n",
+      "id: 10\tword: arms\thead id: 7\thead: rash\tdeprel: nmod\n",
+      "id: 11\tword: ,\thead id: 12\thead: legs\tdeprel: punct\n",
+      "id: 12\tword: legs\thead id: 10\thead: arms\tdeprel: conj\n",
+      "id: 13\tword: ,\thead id: 15\thead: torso\tdeprel: punct\n",
+      "id: 14\tword: and\thead id: 15\thead: torso\tdeprel: cc\n",
+      "id: 15\tword: torso\thead id: 10\thead: arms\tdeprel: conj\n",
+      "id: 16\tword: for\thead id: 20\thead: weeks\tdeprel: case\n",
+      "id: 17\tword: the\thead id: 20\thead: weeks\tdeprel: det\n",
+      "id: 18\tword: past\thead id: 20\thead: weeks\tdeprel: amod\n",
+      "id: 19\tword: few\thead id: 20\thead: weeks\tdeprel: amod\n",
+      "id: 20\tword: weeks\thead id: 4\thead: experiencing\tdeprel: obl\n",
+      "id: 21\tword: .\thead id: 4\thead: experiencing\tdeprel: punct\n",
+      "id: 1\tword: It\thead id: 3\thead: red\tdeprel: nsubj\n",
+      "id: 2\tword: is\thead id: 3\thead: red\tdeprel: cop\n",
+      "id: 3\tword: red\thead id: 0\thead: root\tdeprel: root\n",
+      "id: 4\tword: ,\thead id: 5\thead: itchy\tdeprel: punct\n",
+      "id: 5\tword: itchy\thead id: 3\thead: red\tdeprel: conj\n",
+      "id: 6\tword: ,\thead id: 8\thead: covered\tdeprel: punct\n",
+      "id: 7\tword: and\thead id: 8\thead: covered\tdeprel: cc\n",
+      "id: 8\tword: covered\thead id: 3\thead: red\tdeprel: conj\n",
+      "id: 9\tword: in\thead id: 13\thead: patches\tdeprel: case\n",
+      "id: 10\tword: dry\thead id: 13\thead: patches\tdeprel: amod\n",
+      "id: 11\tword: ,\thead id: 10\thead: dry\tdeprel: punct\n",
+      "id: 12\tword: scaly\thead id: 13\thead: patches\tdeprel: amod\n",
+      "id: 13\tword: patches\thead id: 8\thead: covered\tdeprel: obl\n",
+      "id: 14\tword: .\thead id: 3\thead: red\tdeprel: punct\n"
+     ]
+    }
+   ],
+   "source": [
+    "nlp = stanza.Pipeline(lang='en', processors='depparse', depparse_pretagged=True)\n",
+    "doc = nlp(doc_0)\n",
+    "print(*[f'id: {word.id}\\tword: {word.text}\\thead id: {word.head}\\thead: {sent.words[word.head-1].text if word.head > 0 else \"root\"}\\tdeprel: {word.deprel}' for sent in doc.sentences for word in sent.words], sep='\\n')"
+   ]
  }
 ],
 "metadata": {