LCOV - code coverage report
Current view: top level - libutf8 - unicode_data_file.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 4 0.0 %
Date: 2022-07-31 10:17:08 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2021-2022  Made to Order Software Corp.  All Rights Reserved
       2             : //
       3             : // https://snapwebsites.org/project/libutf8
       4             : // contact@m2osw.com
       5             : //
       6             : // This program is free software; you can redistribute it and/or modify
       7             : // it under the terms of the GNU General Public License as published by
       8             : // the Free Software Foundation; either version 2 of the License, or
       9             : // (at your option) any later version.
      10             : //
      11             : // This program is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : //
      16             : // You should have received a copy of the GNU General Public License along
      17             : // with this program; if not, write to the Free Software Foundation, Inc.,
      18             : // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
      19             : #pragma once
      20             : 
      21             : /** \file
      22             :  * \brief The declarations of the Unicode compiled files.
      23             :  *
      24             :  * This file includes structures used to describe the Unicode compiled
      25             :  * file. This allows us to very quickly find all the information about
      26             :  * a character.
      27             :  *
      28             :  * From the outside, you are expected to use the unicode_character
      29             :  * functions defined in the unicode_data.h header. This file is
      30             :  * considered private.
      31             :  */
      32             : 
      33             : // self
      34             : //
      35             : #include    <libutf8/unicode_data.h>
      36             : 
      37             : 
      38             : // C++
      39             : //
      40             : #include    <string>
      41             : 
      42             : 
      43             : 
      44             : namespace libutf8
      45             : {
      46             : 
      47             : namespace detail
      48             : {
      49             : 
      50             : 
      51             : enum class Name_Type : std::uint8_t  // see UnicodeData.txt and NameAliases.txt
      52             : {
      53             :     NT_Name = 0xF0,
      54             :     NT_Abbreviation = 0xF1,
      55             :     NT_Jamo_Short_Name = 0xF2,  // see Jamo.txt
      56             :     NT_Alternate = 0xF3,
      57             :     NT_Control = 0xF4,
      58             :     NT_WrongName = 0xF5,        // the main name is the corrected name, this name is the invalid/incorrect name
      59             :     NT_Figment = 0xF6,
      60             :     NT_Numeric = 0xF7,          // saved as two int64_t in the strings because that's under 8kb that way
      61             : 
      62             :     NT_EndOfNames = 0xFF,
      63             : };
      64             : 
      65             : 
      66             : 
      67             : struct ucd_header
      68             : {
      69             :     char                f_magic[4] = { 'U', 'C', 'D', 'B' };
      70             :     time_t              f_timestamp = 0;                // time when this file was generated
      71             :     std::uint8_t        f_version = 0;                  // version of this file format
      72             :     std::uint8_t        f_ucd_version[3] = { 1, 1, 0 }; // version of source -- i.e. 5 2 0
      73             :     std::uint32_t       f_characters = 0;               // offset to character table
      74             :     std::uint32_t       f_strings = 0;                  // offset to string table
      75             :     std::uint32_t       f_decomposition = 0;            // offset to decomposition table
      76             : };
      77             : 
      78             : 
      79             : 
      80             : typedef std::uint8_t        flags_t;
      81             : 
      82             : constexpr flags_t           UCD_FLAG_DIGIT              = 0x01; // represents a number
      83             : constexpr flags_t           UCD_FLAG_DECIMAL            = 0x02; // represents a number
      84             : constexpr flags_t           UCD_FLAG_NUMERIC            = 0x04; // represents a number
      85             : constexpr flags_t           UCD_FLAG_BIDI_MIRROR        = 0x08; // mirror of another letter left to right vs. right to left
      86             : constexpr std::uint8_t      UCD_FLAG_CONTROL            = 0x10;
      87             : constexpr std::uint8_t      UCD_FLAG_PRIVATE            = 0x20;
      88             : 
      89             : 
      90             : 
      91             : struct ucd_character
      92             : {
      93           0 :     constexpr ucd_character()
      94           0 :         : f_decomposition_type(static_cast<int>(Decomposition_Type::DT_unknown))
      95             :         , f_decomposition_length(0)
      96           0 :         , f_decomposition_mapping(0)
      97             :     {
      98           0 :     }
      99             : 
     100             :     /* 32 */    char32_t                    f_code = NOT_A_CHARACTER;
     101             :     /* 32 */    std::uint32_t               f_names = 0;        // offset to string table
     102             :     /*  8 */    flags_t                     f_flags = 0;
     103             :     /*  8 */    General_Category            f_general_category = General_Category::GC_Unknown_Category;
     104             :     /*  8 */    Canonical_Combining_Class   f_canonical_combining_class = Canonical_Combining_Class::CCC_Not_Reordered;
     105             :     /*  8 */    Bidi_Class                  f_bidi_class = Bidi_Class::BC_Unknown;
     106             :     /*  5 */    std::uint32_t               f_decomposition_type : 5;
     107             :     /*  5 */    std::uint32_t               f_decomposition_length : 5;
     108             :     /* 22 */    std::uint32_t               f_decomposition_mapping : 22;
     109             :     /* 16 */    std::uint8_t                f_age[2] = { 1, 1 };
     110             : };
     111             : 
     112             : // The f_names is an offset in the string table.
     113             : //
     114             : // Each name is defined as:
     115             : //
     116             : //     struct name_t
     117             : //     {
     118             : //         Name_Type    f_type;
     119             : //         uint8_t      f_size;
     120             : //         char8_t      f_name[f_size];
     121             : //     };
     122             : //
     123             : // Names are not null terminated.
     124             : // followed by UTF-8 until the next byte representing a Name_Type, the
     125             : // last name ends with special type NT_EndOfNames.
     126             : //
     127             : // The first name is the corrected name of the character.
     128             : //
     129             : // Following are the other Name_Type names.
     130             : //
     131             : // The numeric entries are actually two 64 bit numbers (nominator and
     132             : // denominator). The size will always be 16 bytes, but the alignment
     133             : // is likely going to be "wrong" (although that should not matter much
     134             : // on Intel and ARM processors).
     135             : 
     136             : 
     137             : 
     138             : 
     139             : } // detail namespace
     140             : 
     141             : } // libutf8 namespace
     142             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.13