123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- <?xml version="1.0"?>
- <!DOCTYPE kanjidic2 [
- <!-- Version 1.3
- This is the DTD of the XML-format kanji file combining information from
- the KANJIDIC and KANJD212 files. It is intended to be largely self-
- documenting, with each field being accompanied by an explanatory
- comment.
- The file covers the following kanji:
- (a) the 6,355 kanji from JIS X 0208;
- (b) the 5,801 kanji from JIS X 0212;
- (c) the 3,625 kanji from JIS X 0213 as follows:
- (i) the 2,741 kanji which are also in JIS X 0212 have
- JIS X 0213 code-points (kuten) added to the existing entry;
- (ii) the 884 "new" kanji have new entries.
- At the end of the explanation for a number of fields there is a tag
- with the format [N]. This indicates the leading letter(s) of the
- equivalent field in the KANJIDIC and KANJD212 files.
- The KANJIDIC documentation should also be read for additional
- information about the information in the file.
- -->
- <!ELEMENT kanjidic2 (header,character*)>
- <!ELEMENT header (file_version,database_version,date_of_creation)>
- <!--
- The single header element will contain identification information
- about the version of the file
- -->
- <!ELEMENT file_version (#PCDATA)>
- <!--
- This field denotes the version of kanjidic2 structure, as more
- than one version may exist.
- -->
- <!ELEMENT database_version (#PCDATA)>
- <!--
- The version of the file, in the format YYYY-NN, where NN will be
- a number starting with 01 for the first version released in a
- calendar year, then increasing for each version in that year.
- -->
- <!ELEMENT date_of_creation (#PCDATA)>
- <!--
- The date the file was created in international format (YYYY-MM-DD).
- -->
- <!ELEMENT character (literal,codepoint, radical, misc, dic_number?, query_code?, reading_meaning?,nanori?)*>
- <!ELEMENT literal (#PCDATA)>
- <!--
- The character itself in UTF8 coding.
- -->
- <!ELEMENT codepoint (cp_value+)>
- <!--
- The codepoint element states the code of the character in the various
- character set standards.
- -->
- <!ELEMENT cp_value (#PCDATA)>
- <!--
- The cp_value contains the codepoint of the character in a particular
- standard. The standard will be identified in the cp_type attribute.
- -->
- <!ATTLIST cp_value cp_type CDATA #REQUIRED>
- <!--
- The cp_type attribute states the coding standard applying to the
- element. The values assigned so far are:
- jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
- jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
- jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
- ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
- -->
- <!ELEMENT radical (rad_value+)>
- <!ELEMENT rad_value (#PCDATA)>
- <!--
- The radical number, in the range 1 to 214. The particular
- classification type is stated in the rad_type attribute.
- -->
- <!ATTLIST rad_value rad_type CDATA #REQUIRED>
- <!--
- The rad_type attribute states the type of radical classification.
- classical - as recorded in the KangXi Zidian.
- nelson - as used in the Nelson "Modern Japanese-English
- Character Dictionary" (i.e. the Classic, not the New Nelson).
- This will only be used where Nelson reclassified the kanji.
- -->
- <!ELEMENT misc (grade?, stroke_count+, variant*, freq*, rad_name*)>
- <!ELEMENT grade (#PCDATA)>
- <!--
- The Jouyou Kanji grade level. 1 through 6 indicate the grade in which
- the kanji is taught in Japanese schools. 8 indicates it is one of the
- remaining Jouyou Kanji to be learned in junior high school, and 9
- indicates it is a Jinmeiyou (for use in names) kanji. [G]
- -->
- <!ELEMENT stroke_count (#PCDATA)>
- <!--
- The stroke count of the kanji, including the radical. If more than
- one, the first is considered the accepted count, while subsequent ones
- are common miscounts. (See Appendix E. of the KANJIDIC documentation
- for some of the rules applied when counting strokes in some of the
- radicals.) [S]
- -->
- <!ELEMENT variant (#PCDATA)>
- <!--
- A cross-reference code to another kanji, usually regarded as a variant.
- The type of cross-reference is given in the var_type attribute.
- -->
- <!ATTLIST variant var_type CDATA #REQUIRED>
- <!--
- The var_type attribute indicates the type of variant code. The current
- values are:
- jis208 - in JIS X 0208 - kuten coding
- jis212 - in JIS X 0212 - kuten coding
- jis213 - in JIS X 0213 - kuten coding
- deroo - De Roo number - numeric
- njecd - Halpern NJECD index number - numeric
- s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
- nelson - "Classic" Nelson - numeric
- oneill - Japanese Names (O'Neill) - numeric
- -->
- <!ELEMENT freq (#PCDATA)>
- <!--
- A frequency-of-use ranking. The 2,500 most-used characters have a
- ranking; those characters that lack this field are not ranked. The
- frequency is a number from 1 to 2,500 that expresses the relative
- frequency of occurrence of a character in modern Japanese. This is
- based on a survey in newspapers, so it is biassed towards kanji
- used in newspaper articles. The discrimination between the less
- frequently used kanji is not strong.
- -->
- <!ELEMENT rad_name (#PCDATA)>
- <!--
- When the kanji is itself a radical and has a name, this element
- contains the name (in hiragana.) [T2]
- -->
- <!ELEMENT dic_number (dic_ref+)>
- <!--
- This element contains the index numbers and similar unstructured
- information such as page numbers in a number of published dictionaries,
- and instructional books on kanji.
- -->
- <!ELEMENT dic_ref (#PCDATA)>
- <!--
- Each dic_ref contains an index number. The particular dictionary,
- etc. is defined by the dr_type attribute.
- -->
- <!ATTLIST dic_ref dr_type CDATA #REQUIRED>
- <!--
- The dr_type defines the dictionary or reference book, etc. to which
- dic_ref element applies. The initial allocation is:
- nelson_c - "Modern Reader's Japanese-English Character Dictionary",
- edited by Andrew Nelson (now published as the "Classic"
- Nelson).
- nelson_n - "The New Nelson Japanese-English Character Dictionary",
- edited by John Haig.
- halpern_njecd - "New Japanese-English Character Dictionary",
- edited by Jack Halpern.
- halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by
- Jack Halpern.
- heisig - "Remembering The Kanji" by James Heisig.
- gakken - "A New Dictionary of Kanji Usage" (Gakken)
- oneill_names - "Japanese Names", by P.G. O'Neill.
- oneill_kk - "Essential Kanji" by P.G. O'Neill.
- moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
- additional attributes are used: m_vol: the volume of the
- dictionary in which the kanji is found, and m_page: the page
- number in the volume.
- henshall - "A Guide To Remembering Japanese Characters" by
- Kenneth G. Henshall.
- sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
- sakade - "A Guide To Reading and Writing Japanese" edited by
- Florence Sakade.
- henshall3 - "A Guide To Reading and Writing Japanese" 3rd
- edition, edited by Henshall, Seeley and De Groot.
- tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
- crowley - "The Kanji Way to Japanese Language Power" by
- Dale Crowley.
- kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
- busy_people - "Japanese For Busy People" vols I-III, published
- by the AJLT. The codes are the volume.chapter.
- kodansha_compact - the "Kodansha Compact Kanji Guide".
- -->
- <!ATTLIST dic_ref m_vol CDATA #IMPLIED>
- <!--
- See above under "moro".
- -->
- <!ATTLIST dic_ref m_page CDATA #IMPLIED>
- <!--
- See above under "moro".
- -->
- <!ELEMENT query_code (q_code+)>
- <!--
- These codes contain information relating to the glyph, and can be used
- for finding a required kanji. The type of code is defined by the
- qc_type attribute.
- -->
- <!ELEMENT q_code (#PCDATA)>
- <!--
- The q_code contains the actual query-code value, according to the
- qc_type attribute.
- -->
- <!ATTLIST q_code qc_type CDATA #REQUIRED>
- <!--
- The q_code attribute defines the type of query code. The current values
- are:
- skip - Halpern's SKIP (System of Kanji Indexing by Patterns)
- code. The format is n-nn-nn. See the KANJIDIC documentation
- for a description of the code and restrictions on the
- commercial use of this data. [P]
- sh_desc - the descriptor codes for The Kanji Dictionary (Tuttle
- 1996) by Spahn and Hadamitzky. They are in the form nxnn.n,
- e.g. 3k11.2, where the kanji has 3 strokes in the
- identifying radical, it is radical "k" in the SH
- classification system, there are 11 other strokes, and it is
- the 2nd kanji in the 3k11 sequence. (I am very grateful to
- Mark Spahn for providing the list of these descriptor codes
- for the kanji in this file.) [I]
- four_corner - the "Four Corner" code for the kanji. This is a code
- invented by Wang Chen in 1928. See the KANJIDIC documentation
- for an overview of the Four Corner System. [Q]
- deroo - the codes developed by the late Father Joseph De Roo, and
- published in his book "2001 Kanji" (Bojinsha). Fr De Roo
- gave his permission for these codes to be included. [DR]
- misclass - a possible misclassification of the kanji according
- to one of the code types. (See the "Z" codes in the KANJIDIC
- documentation for more details.)
-
- -->
- <!ELEMENT reading_meaning (rmgroup*, nanori*)>
- <!--
- The readings for the kanji in several languages, and the meanings, also
- in several languages. The readings and meanings are grouped to enable
- the handling of the situation where the meaning is differentiated by
- reading. [T1]
- -->
- <!ELEMENT nanori (#PCDATA)>
- <!--
- Japanese readings that are now only associated with names.
- -->
- <!ELEMENT rmgroup (reading*, meaning*)>
- <!ELEMENT reading (#PCDATA)>
- <!--
- The reading element contains the reading or pronunciation
- of the kanji.
- -->
- <!ATTLIST reading r_type CDATA #REQUIRED>
- <!--
- The r_type attribute defines the type of reading in the reading
- element. The current values are:
- pinyin - the modern PinYin romanization of the Chinese reading
- of the kanji. The tones are represented by a concluding
- digit. [Y]
- korean_r - the romanized form of the Korean reading(s) of the
- kanji. The readings are in the (Republic of Korea) Ministry
- of Education style of romanization. [W]
- korean_h - the Korean reading(s) of the kanji in hangul.
- ja_on - the "on" Japanese reading of the kanji, in katakana. A
- second attribute r_status, if present, will indicate with
- a value of "jy" whether the reading is approved for a
- "Jouyou kanji".
- ja_kun - the "kun" Japanese reading of the kanji, in hiragana.
- Where relevant the okurigana is also included separated by a
- ".". Readings associated with prefixes and suffixes are
- marked with a "-". A second attribute r_status, if present,
- will indicate with a value of "jy" whether the reading is
- approved for a "Jouyou kanji".
- -->
- <!ATTLIST reading r_status CDATA #IMPLIED>
- <!--
- See under ja_on and ja_kun above.
- -->
- <!ELEMENT meaning (#PCDATA)>
- <!--
- The meaning associated with the kanji.
- -->
- <!ATTLIST meaning m_lang CDATA #IMPLIED>
- <!--
- The m_lang attribute defines the target language of the meaning. It
- will be coded using the two-letter language code from the ISO 639
- standard. When absent, the value "en" (i.e. English) is implied. [{}]
- -->
- ] >
- <kanjidic2>
- </kanjidic2>
|