Expansion - unihan_etl.expansion#

Functions to uncompact details inside field values.

Notes

re.compile() operations are inside of expand functions:

  1. readability

  2. module-level function bytecode is cached in python

  3. the last used compiled regexes are cached

unihan_etl.expansion.N_DIACRITICS = 'ńňǹ'#

diacritics from kHanyuPinlu

unihan_etl.expansion.expand_kDefinition(value)[source]#
Return type:

List[str]

Parameters:

value (str) –

class unihan_etl.expansion.kMandarinDict[source]#

Bases: dict

unihan_etl.expansion.expand_kMandarin(value)[source]#
Return type:

kMandarinDict

Parameters:

value (List[str]) –

class unihan_etl.expansion.kTotalStrokesDict[source]#

Bases: dict

unihan_etl.expansion.expand_kTotalStrokes(value)[source]#
Return type:

kTotalStrokesDict

Parameters:

value (List[str]) –

class unihan_etl.expansion.kLocationDict[source]#

Bases: TypedDict

volume: int#
page: int#
character: int#
virtual: int#
unihan_etl.expansion.expand_kHanYu(value)[source]#
Return type:

List[kLocationDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kIRGHanyuDaZidian(value)[source]#
Return type:

List[kLocationDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kHanyuPinyinPreDict[source]#

Bases: TypedDict

locations: Sequence[Union[str, kLocationDict]]#
readings: List[str]#
class unihan_etl.expansion.kHanyuPinyinDict[source]#

Bases: TypedDict

locations: kLocationDict#
readings: List[str]#
unihan_etl.expansion.expand_kHanyuPinyin(value)[source]#
Return type:

List[kHanyuPinyinDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kXHC1983LocationDict[source]#

Bases: TypedDict

page: int#
character: int#
entry: Optional[int]#
substituted: bool#
class unihan_etl.expansion.kXHC1983Dict[source]#

Bases: TypedDict

locations: kXHC1983LocationDict#
reading: str#
class unihan_etl.expansion.kXHC1983PreDict[source]#

Bases: TypedDict

locations: Union[List[str], kXHC1983LocationDict]#
reading: str#
unihan_etl.expansion.expand_kXHC1983(value)[source]#
Return type:

List[kXHC1983Dict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kCheungBauerDict[source]#

Bases: TypedDict

radical: int#
strokes: int#
cangjie: Optional[str]#
readings: List[str]#
unihan_etl.expansion.expand_kCheungBauer(value)[source]#
Return type:

List[kCheungBauerDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kRSAdobe_Japan1_6Dict[source]#

Bases: dict

type: str#
cid: int#
radical: int#
strokes: int#
unihan_etl.expansion.expand_kRSAdobe_Japan1_6(value)[source]#
Return type:

List[kRSAdobe_Japan1_6Dict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kCihaiTDict[source]#

Bases: TypedDict

page: int#
row: int#
character: int#
unihan_etl.expansion.expand_kCihaiT(value)[source]#
Return type:

List[kCihaiTDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kIICoreDict[source]#

Bases: TypedDict

priority: str#
sources: List[str]#
unihan_etl.expansion.expand_kIICore(value)[source]#
Return type:

List[kIICoreDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kDaeJaweonDict[source]#

Bases: TypedDict

page: int#
character: int#
virtual: int#
unihan_etl.expansion.expand_kDaeJaweon(value)[source]#
Return type:

kDaeJaweonDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRGKangXi(value)[source]#
Return type:

List[kDaeJaweonDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kIRGDaeJaweon(value)[source]#
Return type:

List[kDaeJaweonDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kFennDict[source]#

Bases: TypedDict

phonetic: str#
frequency: str#
unihan_etl.expansion.expand_kFenn(value)[source]#
Return type:

List[kFennDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kHanyuPinluDict[source]#

Bases: TypedDict

phonetic: str#
frequency: int#
unihan_etl.expansion.expand_kHanyuPinlu(value)[source]#
Return type:

List[kHanyuPinluDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.LocationDict[source]#

Bases: TypedDict

volume: int#
page: int#
character: int#
virtual: int#
class unihan_etl.expansion.kHDZRadBreakDict[source]#

Bases: TypedDict

radical: str#
ucn: str#
location: LocationDict#
unihan_etl.expansion.expand_kHDZRadBreak(value)[source]#
Return type:

kHDZRadBreakDict

Parameters:

value (str) –

class unihan_etl.expansion.kSBGYDict[source]#

Bases: TypedDict

page: int#
character: int#
unihan_etl.expansion.expand_kSBGY(value)[source]#
Return type:

List[kSBGYDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kRSGenericDict[source]#

Bases: TypedDict

radical: int#
strokes: int#
simplified: bool#
unihan_etl.expansion._expand_kRSGeneric(value)[source]#
Return type:

List[kRSGenericDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kRSUnicode(value)[source]#
Return type:

List[kRSGenericDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kRSJapanese(value)[source]#
Return type:

List[kRSGenericDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kRSKangXi(value)[source]#
Return type:

List[kRSGenericDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kRSKanWa(value)[source]#
Return type:

List[kRSGenericDict]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kRSKorean(value)[source]#
Return type:

List[kRSGenericDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.SourceLocationDict[source]#

Bases: TypedDict

source: str#
location: Optional[str]#
unihan_etl.expansion._expand_kIRG_GenericSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_GSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_HSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_JSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_KPSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_KSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_MSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_TSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_USource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

unihan_etl.expansion.expand_kIRG_VSource(value)[source]#
Return type:

SourceLocationDict

Parameters:

value (str) –

class unihan_etl.expansion.kGSRDict[source]#

Bases: TypedDict

set: int#
letter: str#
apostrophe: bool#
unihan_etl.expansion.expand_kGSR(value)[source]#
Return type:

List[kGSRDict]

Parameters:

value (List[str]) –

class unihan_etl.expansion.kCheungBauerIndexDict[source]#

Bases: TypedDict

page: int#
character: int#
unihan_etl.expansion.expand_kCheungBauerIndex(value)[source]#
Return type:

List[Union[str, kCheungBauerIndexDict]]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_kFennIndex(value)[source]#
Return type:

List[Union[str, kCheungBauerIndexDict]]

Parameters:

value (List[str]) –

unihan_etl.expansion.expand_field(field, fvalue)[source]#

Return structured value of information in UNIHAN field.

Return type:

Any

Parameters:
  • field (str) – field name

  • fvalue (str) – value of field

Returns:

expanded field information per UNIHAN’s documentation

Return type:

list or dict