Module dphon.console
Expand source code
from typing import Tuple, List
from rich.console import Console
from rich.highlighter import RegexHighlighter
from rich.theme import Theme
from .match import Match
from .g2p import GraphemesToPhonemes
# Default color scheme for highlighting matches
DEFAULT_THEME = Theme({
"context": "dim",
"variant": "blue",
"insertion": "green",
"mismatch": "red"
})
# Consoles for rendering output
console = Console(theme=DEFAULT_THEME, soft_wrap=True)
err_console = Console(theme=DEFAULT_THEME, stderr=True)
class MatchHighlighter(RegexHighlighter):
"""Highlighter that adds Rich markup to matches for console rendering."""
context: int
gap_char: str
g2p: GraphemesToPhonemes
def __init__(self, g2p: GraphemesToPhonemes, context: int = 0, gap_char: str = "-") -> None:
"""Create a new highlighter with optional context for each match."""
# can't have negative context
if context < 0:
raise ValueError(
f"{self.__class__} context must be greater than 0")
# store parameters
self.context = context
self.gap_char = gap_char
self.g2p = g2p
super().__init__()
def format_match(self, match: Match) -> Tuple[str, str]:
"""Return match sequences as Rich format strings, with optional context.
Adds markup for highlighting insertions, mismatches, etc. If context is
set, also adds highlighted context to either end of the match.
"""
su, sv = self._mark(match)
if self.context > 0:
cul, cur, cvl, cvr = self._add_context(match)
su = cul + su + cur
sv = cvl + sv + cvr
return su, sv
def _mark(self, match: Match) -> Tuple[str, str]:
"""Mark up the match for colorization with a theme.
- Adds markup for insertions (tokens in one sequence but not another).
- Adds markup for mismatches (differing tokens in the same position).
- Adds markup for graphic variants (mismatches with same phonemes).
"""
# if no alignment, just convert to strings because we can't highlight
if not match.au or not match.av:
return match.utxt.text, match.vtxt.text
# o(N) implementation: step through each sequence adding markup
# TODO convert to a DFA so there's less markup repetition
su: List[str] = []
sv: List[str] = []
u_ptr = 0
v_ptr = 0
for i in range(len(match)):
# gap in u: insertion in v (if not punctuation)
if match.au[i] == self.gap_char and match.av[i].isalnum():
su.append(match.au[i])
sv.append(f"[insertion]{match.av[i]}[/insertion]")
v_ptr += 1
continue
# gap in v: insertion in u (if not punctuation)
if match.av[i] == self.gap_char and match.au[i].isalnum():
su.append(f"[insertion]{match.au[i]}[/insertion]")
sv.append(match.av[i])
u_ptr += 1
continue
# variants (both u and v)
if self.g2p.are_graphic_variants(match.utxt[u_ptr], match.vtxt[v_ptr]):
su.append(f"[variant]{match.au[i]}[/variant]")
sv.append(f"[variant]{match.av[i]}[/variant]")
u_ptr += 1
v_ptr += 1
continue
# mismatch (both u and v) - only highlight if alphanumeric
if match.au[i] != match.av[i]:
if match.au[i].isalnum() and match.av[i].isalnum():
su.append(f"[mismatch]{match.au[i]}[/mismatch]")
sv.append(f"[mismatch]{match.av[i]}[/mismatch]")
u_ptr += 1
v_ptr += 1
continue
# equality; nothing to highlight
su.append(match.au[i])
sv.append(match.av[i])
u_ptr += 1
v_ptr += 1
return "".join(su), "".join(sv)
def _add_context(self, match: Match) -> Tuple[str, str, str, str]:
"""Add context to either side of the match sequences.
Context coloration can be changed by the default theme; a dim appearance
is used in terminals.
"""
utxt, vtxt = match.utxt, match.vtxt
u, v = utxt.doc, vtxt.doc
cul = f"[context]{u[utxt.start-self.context:utxt.start]}[/context]"
cur = f"[context]{u[utxt.end:utxt.end+self.context]}[/context]"
cvl = f"[context]{v[vtxt.start-self.context:vtxt.start]}[/context]"
cvr = f"[context]{v[vtxt.end:vtxt.end+self.context]}[/context]"
return (cul, cur, cvl, cvr)
def transcription(self, match: Match) -> Tuple[str, str]:
"""Get the phonemic transcription for the match for display."""
return (
"*" + " ".join(match.utxt._.syllables),
"*" + " ".join(match.vtxt._.syllables)
)
Classes
class MatchHighlighter (g2p: GraphemesToPhonemes, context: int = 0, gap_char: str = '-')
-
Highlighter that adds Rich markup to matches for console rendering.
Create a new highlighter with optional context for each match.
Expand source code
class MatchHighlighter(RegexHighlighter): """Highlighter that adds Rich markup to matches for console rendering.""" context: int gap_char: str g2p: GraphemesToPhonemes def __init__(self, g2p: GraphemesToPhonemes, context: int = 0, gap_char: str = "-") -> None: """Create a new highlighter with optional context for each match.""" # can't have negative context if context < 0: raise ValueError( f"{self.__class__} context must be greater than 0") # store parameters self.context = context self.gap_char = gap_char self.g2p = g2p super().__init__() def format_match(self, match: Match) -> Tuple[str, str]: """Return match sequences as Rich format strings, with optional context. Adds markup for highlighting insertions, mismatches, etc. If context is set, also adds highlighted context to either end of the match. """ su, sv = self._mark(match) if self.context > 0: cul, cur, cvl, cvr = self._add_context(match) su = cul + su + cur sv = cvl + sv + cvr return su, sv def _mark(self, match: Match) -> Tuple[str, str]: """Mark up the match for colorization with a theme. - Adds markup for insertions (tokens in one sequence but not another). - Adds markup for mismatches (differing tokens in the same position). - Adds markup for graphic variants (mismatches with same phonemes). """ # if no alignment, just convert to strings because we can't highlight if not match.au or not match.av: return match.utxt.text, match.vtxt.text # o(N) implementation: step through each sequence adding markup # TODO convert to a DFA so there's less markup repetition su: List[str] = [] sv: List[str] = [] u_ptr = 0 v_ptr = 0 for i in range(len(match)): # gap in u: insertion in v (if not punctuation) if match.au[i] == self.gap_char and match.av[i].isalnum(): su.append(match.au[i]) sv.append(f"[insertion]{match.av[i]}[/insertion]") v_ptr += 1 continue # gap in v: insertion in u (if not punctuation) if match.av[i] == self.gap_char and match.au[i].isalnum(): su.append(f"[insertion]{match.au[i]}[/insertion]") sv.append(match.av[i]) u_ptr += 1 continue # variants (both u and v) if self.g2p.are_graphic_variants(match.utxt[u_ptr], match.vtxt[v_ptr]): su.append(f"[variant]{match.au[i]}[/variant]") sv.append(f"[variant]{match.av[i]}[/variant]") u_ptr += 1 v_ptr += 1 continue # mismatch (both u and v) - only highlight if alphanumeric if match.au[i] != match.av[i]: if match.au[i].isalnum() and match.av[i].isalnum(): su.append(f"[mismatch]{match.au[i]}[/mismatch]") sv.append(f"[mismatch]{match.av[i]}[/mismatch]") u_ptr += 1 v_ptr += 1 continue # equality; nothing to highlight su.append(match.au[i]) sv.append(match.av[i]) u_ptr += 1 v_ptr += 1 return "".join(su), "".join(sv) def _add_context(self, match: Match) -> Tuple[str, str, str, str]: """Add context to either side of the match sequences. Context coloration can be changed by the default theme; a dim appearance is used in terminals. """ utxt, vtxt = match.utxt, match.vtxt u, v = utxt.doc, vtxt.doc cul = f"[context]{u[utxt.start-self.context:utxt.start]}[/context]" cur = f"[context]{u[utxt.end:utxt.end+self.context]}[/context]" cvl = f"[context]{v[vtxt.start-self.context:vtxt.start]}[/context]" cvr = f"[context]{v[vtxt.end:vtxt.end+self.context]}[/context]" return (cul, cur, cvl, cvr) def transcription(self, match: Match) -> Tuple[str, str]: """Get the phonemic transcription for the match for display.""" return ( "*" + " ".join(match.utxt._.syllables), "*" + " ".join(match.vtxt._.syllables) )
Ancestors
- rich.highlighter.RegexHighlighter
- rich.highlighter.Highlighter
- abc.ABC
Class variables
var context : int
var g2p : GraphemesToPhonemes
var gap_char : str
Methods
def format_match(self, match: Match) ‑> Tuple[str, str]
-
Return match sequences as Rich format strings, with optional context.
Adds markup for highlighting insertions, mismatches, etc. If context is set, also adds highlighted context to either end of the match.
Expand source code
def format_match(self, match: Match) -> Tuple[str, str]: """Return match sequences as Rich format strings, with optional context. Adds markup for highlighting insertions, mismatches, etc. If context is set, also adds highlighted context to either end of the match. """ su, sv = self._mark(match) if self.context > 0: cul, cur, cvl, cvr = self._add_context(match) su = cul + su + cur sv = cvl + sv + cvr return su, sv
def transcription(self, match: Match) ‑> Tuple[str, str]
-
Get the phonemic transcription for the match for display.
Expand source code
def transcription(self, match: Match) -> Tuple[str, str]: """Get the phonemic transcription for the match for display.""" return ( "*" + " ".join(match.utxt._.syllables), "*" + " ".join(match.vtxt._.syllables) )