{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "f8b69641-6835-41ed-8d64-fcdf89c6e8d7", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Evaluating: 100%|██████████| 4/4 [00:00<00:00, 6.28it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countcleanrunmicromacrotrain_timepred_time
min_countmin_countprecisionrecallfscoreprecisionrecallfscore
033baseline0.4805050.4805050.4805050.4280870.4268920.426428285.0922490.577610
133model0.4850920.4850920.4850920.4324140.4314570.430959285.0922492.223516
244baseline0.4782110.4782110.4782110.4270170.4258190.425354250.6299571.069185
344model0.4799310.4799310.4799310.4289270.4272990.427088250.6299571.801056
\n", "
" ], "text/plain": [ " count clean run micro macro \n", " min_count min_count precision recall fscore precision \n", "0 3 3 baseline 0.480505 0.480505 0.480505 0.428087 \\\n", "1 3 3 model 0.485092 0.485092 0.485092 0.432414 \n", "2 4 4 baseline 0.478211 0.478211 0.478211 0.427017 \n", "3 4 4 model 0.479931 0.479931 0.479931 0.428927 \n", "\n", " train_time pred_time \n", " recall fscore \n", "0 0.426892 0.426428 285.092249 0.577610 \n", "1 0.431457 0.430959 285.092249 2.223516 \n", "2 0.425819 0.425354 250.629957 1.069185 \n", "3 0.427299 0.427088 250.629957 1.801056 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from minimel import evaluate\n", "from minimel.experiment import get_dir_params\n", "import pandas as pd\n", "import pathlib\n", "\n", "fpreds = sorted(pathlib.Path(\"../data/wiki/simplewiki-20211120/\").rglob(\"run*.tsv\"))\n", "fgold = pathlib.Path(\"../data/mewsli-en-small.tsv\")\n", "\n", "scores = evaluate(fgold, *fpreds)\n", "\n", "dir_params = pd.DataFrame.from_records([\n", " {p.split(\"__\")[0]: dict(get_dir_params(pathlib.Path(p))) for p in i.parts if \"__\" in p}\n", " for i in scores.index\n", "])\n", "dir_params = pd.concat(\n", " {k: pd.DataFrame.from_records(v) for k, v in dir_params.items()}, axis=1\n", ")\n", "dir_params[\"run\"] = dir_params[\"run\"].str.replace(\".tsv\", \"\")\n", "\n", "train_logs = [f.parent.parent / \"time.log\" for f in fpreds]\n", "scores[\"train_time\"] = [float(open(f).readlines()[-1].split()[-1]) for f in train_logs]\n", "pred_logs = [f.parent / f.name.replace(\"run___\", \"\").replace(\".tsv\", \"-time.log\") for f in fpreds]\n", "scores[\"pred_time\"] = [float(open(f).readlines()[-1].split()[-1]) for f in pred_logs]\n", "\n", "score_table = dir_params.join(scores.reset_index(drop=True))\n", "score_table = score_table.drop(\n", " columns=[c for c, v in score_table.items() if len(set(v)) == 1 and 'time' not in c]\n", ")\n", "score_table" ] }, { "cell_type": "code", "execution_count": 3, "id": "210ba875-e34f-4068-b689-857c28bcfd16", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\\begin{tabular}{lrrlrrrrrrrr}\n", "\\toprule\n", " & count & clean & run & \\multicolumn{3}{r}{micro} & \\multicolumn{3}{r}{macro} & train_time & pred_time \\\\\n", " & min_count & min_count & & precision & recall & fscore & precision & recall & fscore & & \\\\\n", "\\midrule\n", "0 & 3 & 3 & baseline & 0.480505 & 0.480505 & 0.480505 & 0.428087 & 0.426892 & 0.426428 & 285.092249 & 0.577610 \\\\\n", "1 & 3 & 3 & model & 0.485092 & 0.485092 & 0.485092 & 0.432414 & 0.431457 & 0.430959 & 285.092249 & 2.223516 \\\\\n", "2 & 4 & 4 & baseline & 0.478211 & 0.478211 & 0.478211 & 0.427017 & 0.425819 & 0.425354 & 250.629957 & 1.069185 \\\\\n", "3 & 4 & 4 & model & 0.479931 & 0.479931 & 0.479931 & 0.428927 & 0.427299 & 0.427088 & 250.629957 & 1.801056 \\\\\n", "\\bottomrule\n", "\\end{tabular}\n", "\n" ] } ], "source": [ "print(score_table.to_latex())" ] }, { "cell_type": "code", "execution_count": null, "id": "cdca97f0-11b8-40f8-98ec-bbc9db4a013f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }