<no title>

[2]:

from minimel import evaluate
from minimel.experiment import get_dir_params
import pandas as pd
import pathlib

fpreds = sorted(pathlib.Path("../data/wiki/simplewiki-20211120/").rglob("run*.tsv"))
fgold = pathlib.Path("../data/mewsli-en-small.tsv")

scores = evaluate(fgold, *fpreds)

dir_params = pd.DataFrame.from_records([
    {p.split("__")[0]: dict(get_dir_params(pathlib.Path(p))) for p in i.parts if "__" in p}
    for i in scores.index
])
dir_params = pd.concat(
    {k: pd.DataFrame.from_records(v) for k, v in dir_params.items()}, axis=1
)
dir_params["run"] = dir_params["run"].str.replace(".tsv", "")

train_logs = [f.parent.parent / "time.log" for f in fpreds]
scores["train_time"] = [float(open(f).readlines()[-1].split()[-1]) for f in train_logs]
pred_logs = [f.parent / f.name.replace("run___", "").replace(".tsv", "-time.log") for f in fpreds]
scores["pred_time"] = [float(open(f).readlines()[-1].split()[-1]) for f in pred_logs]

score_table = dir_params.join(scores.reset_index(drop=True))
score_table = score_table.drop(
    columns=[c for c, v in score_table.items() if len(set(v)) == 1 and 'time' not in c]
)
score_table

Evaluating: 100%|██████████| 4/4 [00:00<00:00,  6.28it/s]

[2]:

	count	clean	run	micro			macro			train_time	pred_time
	min_count	min_count		precision	recall	fscore	precision	recall	fscore
0	3	3	baseline	0.480505	0.480505	0.480505	0.428087	0.426892	0.426428	285.092249	0.577610
1	3	3	model	0.485092	0.485092	0.485092	0.432414	0.431457	0.430959	285.092249	2.223516
2	4	4	baseline	0.478211	0.478211	0.478211	0.427017	0.425819	0.425354	250.629957	1.069185
3	4	4	model	0.479931	0.479931	0.479931	0.428927	0.427299	0.427088	250.629957	1.801056

[3]:

print(score_table.to_latex())

\begin{tabular}{lrrlrrrrrrrr}
\toprule
 & count & clean & run & \multicolumn{3}{r}{micro} & \multicolumn{3}{r}{macro} & train_time & pred_time \\
 & min_count & min_count &  & precision & recall & fscore & precision & recall & fscore &  &  \\
\midrule
0 & 3 & 3 & baseline & 0.480505 & 0.480505 & 0.480505 & 0.428087 & 0.426892 & 0.426428 & 285.092249 & 0.577610 \\
1 & 3 & 3 & model & 0.485092 & 0.485092 & 0.485092 & 0.432414 & 0.431457 & 0.430959 & 285.092249 & 2.223516 \\
2 & 4 & 4 & baseline & 0.478211 & 0.478211 & 0.478211 & 0.427017 & 0.425819 & 0.425354 & 250.629957 & 1.069185 \\
3 & 4 & 4 & model & 0.479931 & 0.479931 & 0.479931 & 0.428927 & 0.427299 & 0.427088 & 250.629957 & 1.801056 \\
\bottomrule
\end{tabular}

[ ]: