Constants - unihan_etl.constants#

unihan_etl.constants.CUSTOM_DELIMITED_FIELDS = ('kDefinition', 'kDaeJaweon', 'kHDZRadBreak', 'kIRG_GSource', 'kIRG_HSource', 'kIRG_JSource', 'kIRG_KPSource', 'kIRG_KSource', 'kIRG_MSource', 'kIRG_TSource', 'kIRG_USource', 'kIRG_VSource')#

FIELDS with multiple values via custom delimiters

unihan_etl.constants.SPACE_DELIMITED_DICT_FIELDS = ('kHanYu', 'kXHC1983', 'kMandarin', 'kTotalStrokes')#

Fields with multiple values UNIHAN delimits by spaces -> dict

unihan_etl.constants.SPACE_DELIMITED_LIST_FIELDS = ('kAccountingNumeric', 'kCantonese', 'kCCCII', 'kCheungBauer', 'kCheungBauerIndex', 'kCihaiT', 'kCowles', 'kFenn', 'kFennIndex', 'kFourCornerCode', 'kGSR', 'kHangul', 'kHanyuPinlu', 'kHanyuPinyin', 'kHKGlyph', 'kIBMJapan', 'kIICore', 'kIRGDaeJaweon', 'kIRGDaiKanwaZiten', 'kIRGHanyuDaZidian', 'kIRGKangXi', 'kJa', 'kJapaneseKun', 'kJapaneseOn', 'kJinmeiyoKanji', 'kJis0', 'kJIS0213', 'kJis1', 'kJoyoKanji', 'kKangXi', 'kKarlgren', 'kKorean', 'kKoreanEducationHanja', 'kKoreanName', 'kKPS0', 'kKPS1', 'kKSC0', 'kKSC1', 'kLua', 'kMainlandTelegraph', 'kMatthews', 'kMeyerWempe', 'kMorohashi', 'kNelson', 'kOtherNumeric', 'kPhonetic', 'kPrimaryNumeric', 'kRSAdobe_Japan1_6', 'kRSJapanese', 'kRSKangXi', 'kRSKanWa', 'kRSKorean', 'kRSUnicode', 'kSBGY', 'kSemanticVariant', 'kSimplifiedVariant', 'kSpecializedSemanticVariant', 'kTaiwanTelegraph', 'kTang', 'kTGH', 'kTraditionalVariant', 'kVietnamese', 'kXerox', 'kZVariant')#

Fields with multiple values UNIHAN delimits by spaces -> list

unihan_etl.constants.SPACE_DELIMITED_FIELDS = ('kAccountingNumeric', 'kCantonese', 'kCCCII', 'kCheungBauer', 'kCheungBauerIndex', 'kCihaiT', 'kCowles', 'kFenn', 'kFennIndex', 'kFourCornerCode', 'kGSR', 'kHangul', 'kHanyuPinlu', 'kHanyuPinyin', 'kHKGlyph', 'kIBMJapan', 'kIICore', 'kIRGDaeJaweon', 'kIRGDaiKanwaZiten', 'kIRGHanyuDaZidian', 'kIRGKangXi', 'kJa', 'kJapaneseKun', 'kJapaneseOn', 'kJinmeiyoKanji', 'kJis0', 'kJIS0213', 'kJis1', 'kJoyoKanji', 'kKangXi', 'kKarlgren', 'kKorean', 'kKoreanEducationHanja', 'kKoreanName', 'kKPS0', 'kKPS1', 'kKSC0', 'kKSC1', 'kLua', 'kMainlandTelegraph', 'kMatthews', 'kMeyerWempe', 'kMorohashi', 'kNelson', 'kOtherNumeric', 'kPhonetic', 'kPrimaryNumeric', 'kRSAdobe_Japan1_6', 'kRSJapanese', 'kRSKangXi', 'kRSKanWa', 'kRSKorean', 'kRSUnicode', 'kSBGY', 'kSemanticVariant', 'kSimplifiedVariant', 'kSpecializedSemanticVariant', 'kTaiwanTelegraph', 'kTang', 'kTGH', 'kTraditionalVariant', 'kVietnamese', 'kXerox', 'kZVariant', 'kHanYu', 'kXHC1983', 'kMandarin', 'kTotalStrokes')#

Any space delimited field regardless of expanded form

unihan_etl.constants.INDEX_FIELDS: Tuple[str, ...] = ('ucn', 'char')#

Default index fields for unihan csv’s. You probably want these.

unihan_etl.constants.WORK_DIR = PosixPath('/home/runner/.cache/unihan_etl/downloads')#

Directory to use for processing intermittent files.

unihan_etl.constants.UNIHAN_FILES = ['Unihan_DictionaryIndices.txt', 'Unihan_DictionaryLikeData.txt', 'Unihan_IRGSources.txt', 'Unihan_NumericValues.txt', 'Unihan_OtherMappings.txt', 'Unihan_RadicalStrokeCounts.txt', 'Unihan_Readings.txt', 'Unihan_Variants.txt']#

Default Unihan Files

unihan_etl.constants.UNIHAN_URL = 'http://www.unicode.org/Public/UNIDATA/Unihan.zip'#

URI of Unihan.zip data.

unihan_etl.constants.DESTINATION_DIR = PosixPath('/home/runner/.local/share/unihan_etl')#

Filepath to output built CSV file to.

unihan_etl.constants.UNIHAN_ZIP_PATH = PosixPath('/home/runner/.cache/unihan_etl/downloads/Unihan.zip')#

Filepath to download Zip file.

unihan_etl.constants.UNIHAN_FIELDS: Tuple[str, ...] = ('kAccountingNumeric', 'kBigFive', 'kCCCII', 'kCNS1986', 'kCNS1992', 'kCangjie', 'kCantonese', 'kCheungBauer', 'kCheungBauerIndex', 'kCihaiT', 'kCompatibilityVariant', 'kCowles', 'kDaeJaweon', 'kDefinition', 'kEACC', 'kFenn', 'kFennIndex', 'kFourCornerCode', 'kFrequency', 'kGB0', 'kGB1', 'kGB3', 'kGB5', 'kGB7', 'kGB8', 'kGSR', 'kGradeLevel', 'kHDZRadBreak', 'kHKGlyph', 'kHKSCS', 'kHanYu', 'kHangul', 'kHanyuPinlu', 'kHanyuPinyin', 'kIBMJapan', 'kIICore', 'kIRGDaeJaweon', 'kIRGDaiKanwaZiten', 'kIRGHanyuDaZidian', 'kIRGKangXi', 'kIRG_GSource', 'kIRG_HSource', 'kIRG_JSource', 'kIRG_KPSource', 'kIRG_KSource', 'kIRG_MSource', 'kIRG_TSource', 'kIRG_USource', 'kIRG_VSource', 'kJIS0213', 'kJa', 'kJapaneseKun', 'kJapaneseOn', 'kJinmeiyoKanji', 'kJis0', 'kJis1', 'kJoyoKanji', 'kKPS0', 'kKPS1', 'kKSC0', 'kKSC1', 'kKangXi', 'kKarlgren', 'kKorean', 'kKoreanEducationHanja', 'kKoreanName', 'kLau', 'kMainlandTelegraph', 'kMandarin', 'kMatthews', 'kMeyerWempe', 'kMorohashi', 'kNelson', 'kOtherNumeric', 'kPhonetic', 'kPrimaryNumeric', 'kPseudoGB1', 'kRSAdobe_Japan1_6', 'kRSJapanese', 'kRSKanWa', 'kRSKangXi', 'kRSKorean', 'kRSUnicode', 'kSBGY', 'kSemanticVariant', 'kSimplifiedVariant', 'kSpecializedSemanticVariant', 'kTGH', 'kTaiwanTelegraph', 'kTang', 'kTotalStrokes', 'kTraditionalVariant', 'kVietnamese', 'kXHC1983', 'kXerox', 'kZVariant')#

Default Unihan fields

unihan_etl.constants.ALLOWED_EXPORT_TYPES = ['json', 'csv', 'yaml']#

Allowed export types