"""Compare output of divvun-checker with the output of divvun-runtime""" import json import sys from dataclasses import asdict, dataclass from pathlib import Path from typing import Any import yaml from giellaltgramtools.gramchecker import check_paragraphs_in_parallel from giellaltgramtools.yaml_gramchecker import YamlGramChecker @dataclass class CheckerResult: form: str beg: int end: int err: str rep: list[str] @classmethod def from_list(cls, data: list[Any]) -> "CheckerResult": """Create CheckerResult from list representation.""" return cls( form=data[0], beg=data[1], end=data[2], err=data[3], rep=data[5], ) def __eq__(self, other: object) -> bool: """Compare CheckerResults using the lower cased err attributes. """ if not isinstance(other, CheckerResult): return NotImplemented return ( self.form == other.form and self.beg == other.beg and self.end == other.end and self.err.lower() == other.err.lower() and self.rep == other.rep ) def __hash__(self): """Define hash to match the custom equality.""" return hash((self.form, self.beg, self.end, tuple(self.rep))) def parse_checker_output(output: str) -> list[list[CheckerResult]]: """Parse divvun-checker format output (newline-separated JSON objects).""" lines = output.strip().splitlines() if not lines: return [] results = json.loads(f"[{','.join(lines)}]") return [ [CheckerResult.from_list(err) for err in result.get("errs", [])] for result in results ] def get_gramcheck_bundles(directory: Path) -> tuple[Path, Path]: """Find .zcheck and .drb files in parent directory.""" zcheck_files = list(directory.parent.glob("*.zcheck")) if not zcheck_files: print("Warning: No .zcheck file found in parent directory", file=sys.stderr) sys.exit(1) zcheck = zcheck_files[0] drb_files = list(directory.parent.glob("*.drb")) if not drb_files: print("Warning: No .drb file found in parent directory", file=sys.stderr) sys.exit(1) drb = drb_files[0] return zcheck, drb def get_paragraphs(spec_file: Path, yaml_file: Path) -> list[str]: """Extract paragraphs from YAML test file.""" gramchecker = YamlGramChecker(config={"spec": spec_file, "test_file": None}) yaml_content = yaml.load(yaml_file.read_text(), Loader=yaml.FullLoader) paragraphs: list[str] = sorted( { gramchecker.paragraph_to_testdata(gramchecker.make_error_markup(text))[0] for text in yaml_content.get("Tests", []) if text.strip() } ) return paragraphs def build_checker_command(zcheck: Path, variant: str|None) -> str: """Build divvun-checker command.""" cmd = f"divvun-checker --archive {zcheck}" if variant is not None: cmd += f" --variant {variant}" return cmd def build_runtime_command(drb: Path, variant: str|None) -> str: """Build divvun-runtime command.""" cmd = f"divvun-runtime run -p {drb}" if variant is not None: # Map variant names if needed (e.g., smegram-dev -> sme-gram) pipeline = "sme-gram" if variant == "smegram" else variant cmd += f" -P {pipeline}" return cmd @dataclass class ComparisonStats: """Statistics for comparison results.""" total_paragraphs: int = 0 total_errors: int = 0 exact_matches: int = 0 typo_suggestion_order_diffs: int = 0 extra_parenthesis_errors: int = 0 other_mismatches: int = 0 def is_typo_suggestion_order_difference( checker_err: CheckerResult, runtime_err: CheckerResult ) -> bool: """Check if the only difference is the order of typo suggestions. Args: checker_err: Error from divvun-checker runtime_err: Error from divvun-runtime Returns: True if errors match except for suggestion order """ # Must be same error type if checker_err.err != runtime_err.err: return False # Must be typo errors if checker_err.err != "typo": return False # Must have same form and position if (checker_err.form != runtime_err.form or checker_err.beg != runtime_err.beg or checker_err.end != runtime_err.end): return False return True def has_extra_parenthesis_errors( checker_errors: list[CheckerResult], runtime_errors: list[CheckerResult] ) -> tuple[bool, list[CheckerResult], list[CheckerResult]]: """Check if runtime has extra parenthesis-missing-space errors that checker doesn't. This is a known difference where divvun-runtime flags parentheses without spaces but divvun-checker doesn't. Args: checker_errors: Errors from divvun-checker runtime_errors: Errors from divvun-runtime Returns: Tuple of (has_extra_paren_errors, filtered_checker_errors, filtered_runtime_errors) """ # Find parenthesis-missing-space errors in runtime paren_errors = [e for e in runtime_errors if e.err == "parenthesis-missing-space"] if not paren_errors: return False, checker_errors, runtime_errors # Filter out parenthesis errors from runtime for comparison runtime_without_paren = [e for e in runtime_errors if e.err != "parenthesis-missing-space"] # Check if the remaining errors match checker errors if len(checker_errors) == len(runtime_without_paren): return True, checker_errors, runtime_without_paren return False, checker_errors, runtime_errors def compare_results( paragraph: str, checker_errors: list[CheckerResult], runtime_errors: list[CheckerResult], stats: ComparisonStats, verbose: bool = True, show_known: bool = False ) -> bool: """Compare results from checker and runtime. Returns True if they match exactly. Args: paragraph: The text being checked checker_errors: Errors from divvun-checker runtime_errors: Errors from divvun-runtime stats: Statistics object to update verbose: Whether to print detailed comparison show_known: Whether to show known differences in output Returns: True if results match exactly or have only known differences, False otherwise """ stats.total_paragraphs += 1 stats.total_errors += len(checker_errors) # Check for extra parenthesis errors (known difference) has_paren_diff, filtered_checker, filtered_runtime = has_extra_parenthesis_errors( checker_errors, runtime_errors ) if has_paren_diff: # Runtime has extra parenthesis errors, but other errors might still match stats.extra_parenthesis_errors += 1 if verbose and show_known: paren_errs = [e for e in runtime_errors if e.err == "parenthesis-missing-space"] print(f"\nKnown difference (extra parenthesis errors) for: {paragraph}") print(f"Runtime found {len(paren_errs)} extra parenthesis error(s):") for pe in paren_errs: print(f" - '{pe.form}' at {pe.beg}-{pe.end}") # Continue comparing the filtered errors checker_errors = filtered_checker runtime_errors = filtered_runtime if len(checker_errors) != len(runtime_errors): stats.other_mismatches += 1 if verbose: print(f"\nMismatch for: {paragraph}") print(f"Different number of errors: checker={len(checker_errors)}, runtime={len(runtime_errors)}") print("divvun-checker errors:") print(json.dumps([asdict(e) for e in checker_errors], indent=2, ensure_ascii=False)) print("divvun-runtime errors:") print(json.dumps([asdict(e) for e in runtime_errors], indent=2, ensure_ascii=False)) return False has_typo_order_diff = False has_unknown_difference = False for checker_err, runtime_err in zip(checker_errors, runtime_errors, strict=True): if checker_err == runtime_err: # Exact match continue elif is_typo_suggestion_order_difference(checker_err, runtime_err): # Known difference: typo suggestions in different order has_typo_order_diff = True if verbose and show_known: print(f"\nKnown difference (typo suggestion order) for: {paragraph}") print(f"Form: {checker_err.form} at {checker_err.beg}-{checker_err.end}") print(f"Checker suggestions: {checker_err.rep[:5]}...") print(f"Runtime suggestions: {runtime_err.rep[:5]}...") else: # Unknown difference has_unknown_difference = True if verbose: print(f"\nUnknown mismatch for: {paragraph}") print("divvun-checker error:") print(json.dumps(asdict(checker_err), indent=2, ensure_ascii=False)) print("divvun-runtime error:") print(json.dumps(asdict(runtime_err), indent=2, ensure_ascii=False)) if has_unknown_difference: stats.other_mismatches += 1 return False elif has_typo_order_diff: stats.typo_suggestion_order_diffs += 1 return True # Consider it a match for overall statistics elif has_paren_diff: # Already counted above, just return True return True else: stats.exact_matches += 1 return True def engine_comparator(directory_name: str, variant: str|None = None, show_known: bool = False): """Compare divvun-checker and divvun-runtime outputs for all YAML files in directory. Args: directory_name: Path to directory containing YAML test files variant: Optional variant/pipeline name to use show_known: Whether to show known differences in output """ directory = Path(directory_name) zcheck, drb = get_gramcheck_bundles(directory) checker_cmd = build_checker_command(zcheck, variant) runtime_cmd = build_runtime_command(drb, variant) print(f"Checker command: {checker_cmd}") print(f"Runtime command: {runtime_cmd}") print() stats = ComparisonStats() files_processed = 0 for yaml_file in directory.glob("*.yaml"): print(f"Processing {yaml_file.name}...") files_processed += 1 # Get test paragraphs paragraphs = get_paragraphs(zcheck, yaml_file) # Run both checkers in parallel checker_output = check_paragraphs_in_parallel(checker_cmd, paragraphs) runtime_output = check_paragraphs_in_parallel(runtime_cmd, paragraphs) # Parse outputs checker_results = parse_checker_output(checker_output) runtime_results = parse_checker_output(runtime_output) # Check if result counts match if len(checker_results) != len(paragraphs): print(f"ERROR: Checker returned {len(checker_results)} results for {len(paragraphs)} paragraphs", file=sys.stderr) continue if len(runtime_results) != len(paragraphs): print(f"ERROR: Runtime returned {len(runtime_results)} results for {len(paragraphs)} paragraphs", file=sys.stderr) continue # Compare results file_mismatches = 0 file_known_diffs = 0 for paragraph, checker_errs, runtime_errs in zip( paragraphs, checker_results, runtime_results, strict=True ): errors_before = stats.other_mismatches known_before = stats.typo_suggestion_order_diffs # Compare with verbose=False first matches = compare_results(paragraph, checker_errs, runtime_errs, stats, verbose=False, show_known=show_known) # Check what kind of difference we found if not matches: file_mismatches += 1 # Show details for first few unknown mismatches if file_mismatches <= 3 and stats.other_mismatches > errors_before: compare_results(paragraph, checker_errs, runtime_errs, stats, verbose=True, show_known=show_known) elif stats.typo_suggestion_order_diffs > known_before: file_known_diffs += 1 # Report file results exact_matches = len(paragraphs) - file_mismatches - file_known_diffs if file_mismatches == 0: if file_known_diffs > 0: print(f"✓ {yaml_file.name}: {exact_matches} exact matches, {file_known_diffs} known differences (typo order)") else: print(f"✓ {yaml_file.name}: All {len(paragraphs)} paragraphs match exactly") else: print(f"✗ {yaml_file.name}: {file_mismatches} unknown mismatches, {file_known_diffs} known differences") print() print("=" * 70) print("Summary:") print(f" Files processed: {files_processed}") print(f" Total paragraphs checked: {stats.total_paragraphs}") print(f" Total errors found: {stats.total_errors}") print(f" Exact matches: {stats.exact_matches}") print(" Known differences:") print(f" - Typo suggestion order: {stats.typo_suggestion_order_diffs}") print(f" - Extra parenthesis errors: {stats.extra_parenthesis_errors}") print(f" Unknown mismatches: {stats.other_mismatches}") print() total_known = stats.typo_suggestion_order_diffs + stats.extra_parenthesis_errors if stats.other_mismatches == 0: if total_known > 0: print(f" ✓ Success! Only known differences found ({total_known} total)") else: print(" ✓ Perfect! All outputs match exactly!") else: print(f" ✗ {stats.other_mismatches} unknown mismatches need investigation")