[2]:
from minimel import evaluate
from minimel.experiment import get_dir_params
import pandas as pd
import pathlib

fpreds = sorted(pathlib.Path("../data/wiki/simplewiki-20211120/").rglob("run*.tsv"))
fgold = pathlib.Path("../data/mewsli-en-small.tsv")

scores = evaluate(fgold, *fpreds)

dir_params = pd.DataFrame.from_records([
    {p.split("__")[0]: dict(get_dir_params(pathlib.Path(p))) for p in i.parts if "__" in p}
    for i in scores.index
])
dir_params = pd.concat(
    {k: pd.DataFrame.from_records(v) for k, v in dir_params.items()}, axis=1
)
dir_params["run"] = dir_params["run"].str.replace(".tsv", "")

train_logs = [f.parent.parent / "time.log" for f in fpreds]
scores["train_time"] = [float(open(f).readlines()[-1].split()[-1]) for f in train_logs]
pred_logs = [f.parent / f.name.replace("run___", "").replace(".tsv", "-time.log") for f in fpreds]
scores["pred_time"] = [float(open(f).readlines()[-1].split()[-1]) for f in pred_logs]

score_table = dir_params.join(scores.reset_index(drop=True))
score_table = score_table.drop(
    columns=[c for c, v in score_table.items() if len(set(v)) == 1 and 'time' not in c]
)
score_table
Evaluating: 100%|██████████| 4/4 [00:00<00:00,  6.28it/s]
[2]:
count clean run micro macro train_time pred_time
min_count min_count precision recall fscore precision recall fscore
0 3 3 baseline 0.480505 0.480505 0.480505 0.428087 0.426892 0.426428 285.092249 0.577610
1 3 3 model 0.485092 0.485092 0.485092 0.432414 0.431457 0.430959 285.092249 2.223516
2 4 4 baseline 0.478211 0.478211 0.478211 0.427017 0.425819 0.425354 250.629957 1.069185
3 4 4 model 0.479931 0.479931 0.479931 0.428927 0.427299 0.427088 250.629957 1.801056
[3]:
print(score_table.to_latex())
\begin{tabular}{lrrlrrrrrrrr}
\toprule
 & count & clean & run & \multicolumn{3}{r}{micro} & \multicolumn{3}{r}{macro} & train_time & pred_time \\
 & min_count & min_count &  & precision & recall & fscore & precision & recall & fscore &  &  \\
\midrule
0 & 3 & 3 & baseline & 0.480505 & 0.480505 & 0.480505 & 0.428087 & 0.426892 & 0.426428 & 285.092249 & 0.577610 \\
1 & 3 & 3 & model & 0.485092 & 0.485092 & 0.485092 & 0.432414 & 0.431457 & 0.430959 & 285.092249 & 2.223516 \\
2 & 4 & 4 & baseline & 0.478211 & 0.478211 & 0.478211 & 0.427017 & 0.425819 & 0.425354 & 250.629957 & 1.069185 \\
3 & 4 & 4 & model & 0.479931 & 0.479931 & 0.479931 & 0.428927 & 0.427299 & 0.427088 & 250.629957 & 1.801056 \\
\bottomrule
\end{tabular}

[ ]: