kicad-source/cmake/BuildSteps/TokenList2DsnLexer.cmake


#  This program source code file is part of KICAD, a free EDA CAD application.
#
#  Copyright (C) 2010 Wayne Stambaugh <stambaughw@verizon.net>
#  Copyright (C) 2010 Kicad Developers, see AUTHORS.txt for contributors.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, you may find one here:
#  http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
#  or you may search the http://www.gnu.org website for the version 2 license,
#  or you may write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
#
#
# This script converts a plain text file with a line feed separated list
# of token names into the appropriate source and header files required by
# the DSN lexer.  See files "<base_source_path>/common/dsnlexer.cpp" and
# "<base_source_path>/include/dsnlexer.h" for more information about how
# the DSN lexer works.  The token list file format requires a single token
# per line.  Tokens can only contain lower case letters, numbers, and
# underscores.  The first letter of each token must be a lower case letter.
# Tokens must be unique.  If any of the above criteria are not met, the
# source and header files will not be generated and a build error will
# occur.
#
# Valid tokens:    a a1 foo_1 foo_bar2
# Invalid tokens:  1 A _foo bar_ foO
#
# Invocation Parameters are:  enum, inputFile, outCppFile, outHeaderFile
#
#     enum       - Required, namespace in which the enum T will be placed.
#                  Keep it short because from outside the class you want a short enum name
#                  like enum::T.   Enums are contained in their own namespace to avoid
#                  collisions on enum value names, a problem with C++ unless the enum
#                  itself is in a separate namespace.
#
#     inputFile  - Required, name of the token list file, or "*.keywords" file.
#                  Choose the basefilename carefully, it decides the class name
#                  used in the generated *_lexer.h file.
#
#     outCppFile - Optional, full path and file name of where to save the generated
#                  cpp keywords file.  If not defined, the output path is the same
#                  path as the token list file path, with a file name of *_keywords.cpp
#
#  outHeaderFile - Optional, full path and file name of where to save the generated
#                  *.h lexfer file.  If not defined, the output path is the same
#                  path as the token list file path, with a file name of *_lexer.h
#
# Use the max_lexer() CMake function from functions.cmake for invocation convenience.


#message( STATUS "TokenList2DsnLexer.cmake" )    # indicate we are running

set( tokens "" )
set( lineCount 0 )
set( dsnErrorMsg "TokenList2DsnLexer.cmake failure:" )

if( NOT EXISTS ${inputFile} )
    message( FATAL_ERROR "${dsnErrorMsg} file ${inputFile} cannot be found." )
endif()

if( NOT DEFINED enum )
    message( FATAL_ERROR "${dsnErrorMsg} missing \"enum\" processing ${inputFile}." )
endif()

get_filename_component( outputPath "${inputFile}" PATH )

# the keywords filename without extension is important, it sets the classname into RESULT
get_filename_component( result "${inputFile}" NAME_WE )
string( TOUPPER "${result}" RESULT )

set( LEXERCLASS "${RESULT}_LEXER" )
set( PARSERCLASS "${RESULT}_PARSER" )

#message( "enum:'${enum}' result:'${result}' outputPath:'${outputPath}' inputFile:'${inputFile}'" )

if( NOT DEFINED outCppFile )
    set( outCppFile "${outputPath}/${result}_keywords.cpp" )
endif()

if( NOT DEFINED outHeaderFile )
    set( outHeaderFile "${outputPath}/${result}_lexer.h" )
endif()

# Create tag for generating header file.
set( headerTag "${LEXERCLASS}_H_" )

set( includeFileHeader
"
/* Do not modify this file it was automatically generated by the
 * TokenList2DsnLexer CMake script.
 */

#ifndef ${headerTag}
#define ${headerTag}

#include <dsnlexer.h>

/**
 * C++ does not put enum _values_ in separate namespaces unless the enum itself
 * is in a separate namespace.  All the token enums must be in separate namespaces
 * otherwise the C++ compiler will eventually complain if it sees more than one
 * DSNLEXER in the same compilation unit, say by multiple header file inclusion.
 * Plus this also enables re-use of the same enum name T.  A typedef can always be used
 * to clarify which enum T is in play should that ever be a problem.  This is
 * unlikely since Parse() functions will usually only be exposed to one header
 * file like this one.  But if there is a problem, then use:
 *   typedef ${enum}::T T;
 * within that problem area.
 */
namespace ${enum}
{
    /// enum T contains all this lexer's tokens.
    enum T
    {
        // these first few are negative special ones for syntax, and are
        // inherited from DSNLEXER.
        T_NONE          = DSN_NONE,
        T_COMMENT       = DSN_COMMENT,
        T_STRING_QUOTE  = DSN_STRING_QUOTE,
        T_QUOTE_DEF     = DSN_QUOTE_DEF,
        T_DASH          = DSN_DASH,
        T_SYMBOL        = DSN_SYMBOL,
        T_NUMBER        = DSN_NUMBER,
        T_RIGHT         = DSN_RIGHT,        // right bracket: ')'
        T_LEFT          = DSN_LEFT,         // left bracket:  '('
        T_STRING        = DSN_STRING,       // a quoted string, stripped of the quotes
        T_EOF           = DSN_EOF,          // special case for end of file

"
)


set( sourceFileHeader
"
/* Do not modify this file it was automatically generated by the
 * TokenList2DsnLexer CMake script.
 *
 * Include this file in your lexer class to provide the keywords for
 * your DSN lexer.
 */

#include <${outHeaderFile}>

using namespace ${enum};

#define TOKDEF(x)    { #x, T_##x }

const KEYWORD ${LEXERCLASS}::keywords[] = {
"
)

file( STRINGS ${inputFile} lines NO_HEX_CONVERSION )

foreach( line ${lines} )
    math( EXPR lineCount "${lineCount} + 1" )

    # strip any comment from # to end of line
    string( REGEX REPLACE "#.*$" "" tmpToken "${line}" )
    string( STRIP "${tmpToken}" token )

    # Ignore empty lines.
    if( NOT token STREQUAL "" )           # if token is "off" simple if( token) does not work
        # Make sure token is valid.

        #message( "token=${token}" )

        string( REGEX MATCH "[a-z][_0-9a-z]*" validToken "${token}" )
        #message( "validToken=${validToken}" )

        if( validToken STREQUAL token )
            list( APPEND tokens "${validToken}" )
        else()
            message( FATAL_ERROR
                     "Invalid token string \"${tmpToken}\" at line ${lineCount} in file "
                     "<${inputFile}>." )
        endif()
    endif()
endforeach()

list( SORT tokens )

# Check for duplicates.
list( LENGTH tokens tokensBefore )
list( REMOVE_DUPLICATES tokens )
list( LENGTH tokens tokensAfter )

if( NOT ( tokensBefore EQUAL tokensAfter ) )
    message( FATAL_ERROR "Duplicate tokens found in file <${inputFile}>." )
endif()

file( WRITE "${outHeaderFile}" "${includeFileHeader}" )
file( WRITE "${outCppFile}" "${sourceFileHeader}" )

set( lineCount 1 )

foreach( token ${tokens} )
    if( lineCount EQUAL 1 )
        file( APPEND "${outHeaderFile}" "        T_${token} = 0" )
    else( lineCount EQUAL 1 )
        file( APPEND "${outHeaderFile}" "        T_${token}" )
    endif( lineCount EQUAL 1 )

    file(APPEND "${outCppFile}" "    TOKDEF( ${token} )" )

    if( lineCount EQUAL tokensAfter )
        file( APPEND "${outHeaderFile}" "\n" )
        file( APPEND "${outCppFile}" "\n" )
    else( lineCount EQUAL tokensAfter )
        file( APPEND "${outHeaderFile}" ",\n" )
        file( APPEND "${outCppFile}" ",\n" )
    endif( lineCount EQUAL tokensAfter )
    math( EXPR lineCount "${lineCount} + 1" )
endforeach()

file( APPEND "${outHeaderFile}"
"    };
}   // namespace ${enum}


/**
 * Class ${LEXERCLASS}
 * is an automatically generated class using the TokenList2DnsLexer.cmake
 * technology, based on keywords provided by file:
 *    ${inputFile}
 */
class ${LEXERCLASS} : public DSNLEXER_KEYWORDED<${enum}::T>
{
public:
    using DSNLEXER_KEYWORDED<${enum}::T>::DSNLEXER_KEYWORDED;
};

// example usage

/**
 * Class ${LEXCLASS}_PARSER
 * holds data and functions pertinent to parsing a S-expression file .
 *
class ${PARSERCLASS} : public ${LEXERCLASS}
{

};
*/

#endif   // ${headerTag}
"
)

file( APPEND "${outCppFile}"
"};

const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );
"
)


file( APPEND "${outCppFile}"
"

const KEYWORD_MAP ${LEXERCLASS}::keywords_hash({
"
)

set( TOKEN_NUM 0 )

math( EXPR tokensAfter "${tokensAfter} - 1" )

foreach( token ${tokens} )
    file(APPEND "${outCppFile}" "    { \"${token}\", ${TOKEN_NUM} }" )

    if( TOKEN_NUM EQUAL tokensAfter )
        file( APPEND "${outCppFile}" "\n" )
    else( TOKEN_NUM EQUAL tokensAfter )
        file( APPEND "${outCppFile}" ",\n" )
    endif()

    math( EXPR TOKEN_NUM "${TOKEN_NUM} + 1" )
endforeach()

file( APPEND "${outCppFile}"
"});")
1) Add "rules" to base of tree for copying into BZR_HOME/rules. File "rules" has instructional text as comments near top. 2) Convert all text files in repo to LF line ending form. Any checkout done with "rules" in play will convert the working tree to native line ending, while keeping repo as LF line ending. 2013-05-25 23:36:44 -05:00
			`# This program source code file is part of KICAD, a free EDA CAD application.`
			`#`
			`# Copyright (C) 2010 Wayne Stambaugh <stambaughw@verizon.net>`
			`# Copyright (C) 2010 Kicad Developers, see AUTHORS.txt for contributors.`
			`#`
			`# This program is free software; you can redistribute it and/or`
			`# modify it under the terms of the GNU General Public License`
			`# as published by the Free Software Foundation; either version 2`
			`# of the License, or (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program; if not, you may find one here:`
			`# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html`
			`# or you may search the http://www.gnu.org website for the version 2 license,`
			`# or you may write to the Free Software Foundation, Inc.,`
			`# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA`
			`#`
			`#`
			`# This script converts a plain text file with a line feed separated list`
			`# of token names into the appropriate source and header files required by`
			`# the DSN lexer. See files "<base_source_path>/common/dsnlexer.cpp" and`
			`# "<base_source_path>/include/dsnlexer.h" for more information about how`
			`# the DSN lexer works. The token list file format requires a single token`
			`# per line. Tokens can only contain lower case letters, numbers, and`
			`# underscores. The first letter of each token must be a lower case letter.`
			`# Tokens must be unique. If any of the above criteria are not met, the`
			`# source and header files will not be generated and a build error will`
			`# occur.`
			`#`
			`# Valid tokens: a a1 foo_1 foo_bar2`
			`# Invalid tokens: 1 A _foo bar_ foO`
			`#`
			`# Invocation Parameters are: enum, inputFile, outCppFile, outHeaderFile`
			`#`
			`# enum - Required, namespace in which the enum T will be placed.`
			`# Keep it short because from outside the class you want a short enum name`
			`# like enum::T. Enums are contained in their own namespace to avoid`
			`# collisions on enum value names, a problem with C++ unless the enum`
			`# itself is in a separate namespace.`
			`#`
			`# inputFile - Required, name of the token list file, or "*.keywords" file.`
			`# Choose the basefilename carefully, it decides the class name`
			`# used in the generated *_lexer.h file.`
			`#`
			`# outCppFile - Optional, full path and file name of where to save the generated`
			`# cpp keywords file. If not defined, the output path is the same`
			`# path as the token list file path, with a file name of *_keywords.cpp`
			`#`
			`# outHeaderFile - Optional, full path and file name of where to save the generated`
			`# *.h lexfer file. If not defined, the output path is the same`
			`# path as the token list file path, with a file name of *_lexer.h`
			`#`
			`# Use the max_lexer() CMake function from functions.cmake for invocation convenience.`


			`#message( STATUS "TokenList2DsnLexer.cmake" ) # indicate we are running`

			`set( tokens "" )`
			`set( lineCount 0 )`
			`set( dsnErrorMsg "TokenList2DsnLexer.cmake failure:" )`

			`if( NOT EXISTS ${inputFile} )`
			`message( FATAL_ERROR "${dsnErrorMsg} file ${inputFile} cannot be found." )`
			`endif()`

			`if( NOT DEFINED enum )`
			`message( FATAL_ERROR "${dsnErrorMsg} missing \"enum\" processing ${inputFile}." )`
			`endif()`

			`get_filename_component( outputPath "${inputFile}" PATH )`

			`# the keywords filename without extension is important, it sets the classname into RESULT`
			`get_filename_component( result "${inputFile}" NAME_WE )`
			`string( TOUPPER "${result}" RESULT )`

			`set( LEXERCLASS "${RESULT}_LEXER" )`
			`set( PARSERCLASS "${RESULT}_PARSER" )`

			`#message( "enum:'${enum}' result:'${result}' outputPath:'${outputPath}' inputFile:'${inputFile}'" )`

			`if( NOT DEFINED outCppFile )`
			`set( outCppFile "${outputPath}/${result}_keywords.cpp" )`
			`endif()`

			`if( NOT DEFINED outHeaderFile )`
			`set( outHeaderFile "${outputPath}/${result}_lexer.h" )`
			`endif()`

			`# Create tag for generating header file.`
			`set( headerTag "${LEXERCLASS}_H_" )`

			`set( includeFileHeader`
			`"`
			`/* Do not modify this file it was automatically generated by the`
			`* TokenList2DsnLexer CMake script.`
			`*/`

			`#ifndef ${headerTag}`
			`#define ${headerTag}`

			`#include <dsnlexer.h>`

			`/**`
			`* C++ does not put enum _values_ in separate namespaces unless the enum itself`
			`* is in a separate namespace. All the token enums must be in separate namespaces`
			`* otherwise the C++ compiler will eventually complain if it sees more than one`
Fix source comment / documentation typos 2021-06-09 19:32:58 +00:00			`* DSNLEXER in the same compilation unit, say by multiple header file inclusion.`
1) Add "rules" to base of tree for copying into BZR_HOME/rules. File "rules" has instructional text as comments near top. 2) Convert all text files in repo to LF line ending form. Any checkout done with "rules" in play will convert the working tree to native line ending, while keeping repo as LF line ending. 2013-05-25 23:36:44 -05:00			`* Plus this also enables re-use of the same enum name T. A typedef can always be used`
			`* to clarify which enum T is in play should that ever be a problem. This is`
			`* unlikely since Parse() functions will usually only be exposed to one header`
			`* file like this one. But if there is a problem, then use:`
			`* typedef ${enum}::T T;`
			`* within that problem area.`
			`*/`
			`namespace ${enum}`
			`{`
			`/// enum T contains all this lexer's tokens.`
			`enum T`
			`{`
			`// these first few are negative special ones for syntax, and are`
			`// inherited from DSNLEXER.`
			`T_NONE = DSN_NONE,`
			`T_COMMENT = DSN_COMMENT,`
			`T_STRING_QUOTE = DSN_STRING_QUOTE,`
			`T_QUOTE_DEF = DSN_QUOTE_DEF,`
			`T_DASH = DSN_DASH,`
			`T_SYMBOL = DSN_SYMBOL,`
			`T_NUMBER = DSN_NUMBER,`
			`T_RIGHT = DSN_RIGHT, // right bracket: ')'`
			`T_LEFT = DSN_LEFT, // left bracket: '('`
			`T_STRING = DSN_STRING, // a quoted string, stripped of the quotes`
			`T_EOF = DSN_EOF, // special case for end of file`

			`"`
			`)`


			`set( sourceFileHeader`
			`"`
			`/* Do not modify this file it was automatically generated by the`
			`* TokenList2DsnLexer CMake script.`
			`*`
			`* Include this file in your lexer class to provide the keywords for`
			`* your DSN lexer.`
			`*/`

Fix build order for generated headers and sources This changes make_lexer() so that it no longer generates a custom target but instead attaches the generated files to an existing one (so the first argument now is the name of an existing library or executable, and it needs to come after the add_library/add_executable call). The generated source is no longer listed in the project sources, as it is added by the function. The files are generated in the build tree rather than the source tree, and the directory is added to the include path for the respective project as well as exported to projects linking against it. Generated files in subdirectories are somewhat supported, but need to be referenced with the same name as they were generated (i.e. including the subdirectory name). Fixes: lp:1831643 * https://bugs.launchpad.net/kicad/+bug/1831643 Fixes: lp:1832357 * https://bugs.launchpad.net/kicad/+bug/1832357 Fixes: lp:1833851 * https://bugs.launchpad.net/kicad/+bug/1833851 2019-07-03 11:08:55 +02:00			`#include <${outHeaderFile}>`
1) Add "rules" to base of tree for copying into BZR_HOME/rules. File "rules" has instructional text as comments near top. 2) Convert all text files in repo to LF line ending form. Any checkout done with "rules" in play will convert the working tree to native line ending, while keeping repo as LF line ending. 2013-05-25 23:36:44 -05:00
			`using namespace ${enum};`

			`#define TOKDEF(x) { #x, T_##x }`

			`const KEYWORD ${LEXERCLASS}::keywords[] = {`
			`"`
			`)`

			`file( STRINGS ${inputFile} lines NO_HEX_CONVERSION )`

			`foreach( line ${lines} )`
			`math( EXPR lineCount "${lineCount} + 1" )`

			`# strip any comment from # to end of line`
			`string( REGEX REPLACE "#.*$" "" tmpToken "${line}" )`
			`string( STRIP "${tmpToken}" token )`

			`# Ignore empty lines.`
			`if( NOT token STREQUAL "" ) # if token is "off" simple if( token) does not work`
			`# Make sure token is valid.`

			`#message( "token=${token}" )`

			`string( REGEX MATCH "[a-z][_0-9a-z]*" validToken "${token}" )`
			`#message( "validToken=${validToken}" )`

			`if( validToken STREQUAL token )`
			`list( APPEND tokens "${validToken}" )`
			`else()`
			`message( FATAL_ERROR`
			`"Invalid token string \"${tmpToken}\" at line ${lineCount} in file "`
			`"<${inputFile}>." )`
			`endif()`
			`endif()`
			`endforeach()`

			`list( SORT tokens )`

			`# Check for duplicates.`
			`list( LENGTH tokens tokensBefore )`
			`list( REMOVE_DUPLICATES tokens )`
			`list( LENGTH tokens tokensAfter )`

			`if( NOT ( tokensBefore EQUAL tokensAfter ) )`
			`message( FATAL_ERROR "Duplicate tokens found in file <${inputFile}>." )`
			`endif()`

			`file( WRITE "${outHeaderFile}" "${includeFileHeader}" )`
			`file( WRITE "${outCppFile}" "${sourceFileHeader}" )`

			`set( lineCount 1 )`

			`foreach( token ${tokens} )`
			`if( lineCount EQUAL 1 )`
			`file( APPEND "${outHeaderFile}" " T_${token} = 0" )`
			`else( lineCount EQUAL 1 )`
			`file( APPEND "${outHeaderFile}" " T_${token}" )`
			`endif( lineCount EQUAL 1 )`

			`file(APPEND "${outCppFile}" " TOKDEF( ${token} )" )`

			`if( lineCount EQUAL tokensAfter )`
			`file( APPEND "${outHeaderFile}" "\n" )`
			`file( APPEND "${outCppFile}" "\n" )`
			`else( lineCount EQUAL tokensAfter )`
			`file( APPEND "${outHeaderFile}" ",\n" )`
			`file( APPEND "${outCppFile}" ",\n" )`
			`endif( lineCount EQUAL tokensAfter )`
			`math( EXPR lineCount "${lineCount} + 1" )`
			`endforeach()`

			`file( APPEND "${outHeaderFile}"`
			`" };`
			`} // namespace ${enum}`


			`/**`
			`* Class ${LEXERCLASS}`
			`* is an automatically generated class using the TokenList2DnsLexer.cmake`
			`* technology, based on keywords provided by file:`
			`* ${inputFile}`
			`*/`
Simplify the generated lexers to the use of a templated class 2022-08-21 15:14:31 -04:00			`class ${LEXERCLASS} : public DSNLEXER_KEYWORDED<${enum}::T>`
1) Add "rules" to base of tree for copying into BZR_HOME/rules. File "rules" has instructional text as comments near top. 2) Convert all text files in repo to LF line ending form. Any checkout done with "rules" in play will convert the working tree to native line ending, while keeping repo as LF line ending. 2013-05-25 23:36:44 -05:00			`{`
			`public:`
Simplify the generated lexers to the use of a templated class 2022-08-21 15:14:31 -04:00			`using DSNLEXER_KEYWORDED<${enum}::T>::DSNLEXER_KEYWORDED;`
1) Add "rules" to base of tree for copying into BZR_HOME/rules. File "rules" has instructional text as comments near top. 2) Convert all text files in repo to LF line ending form. Any checkout done with "rules" in play will convert the working tree to native line ending, while keeping repo as LF line ending. 2013-05-25 23:36:44 -05:00			`};`

			`// example usage`

			`/**`
			`* Class ${LEXCLASS}_PARSER`
			`* holds data and functions pertinent to parsing a S-expression file .`
			`*`
			`class ${PARSERCLASS} : public ${LEXERCLASS}`
			`{`

			`};`
			`*/`

			`#endif // ${headerTag}`
			`"`
			`)`

			`file( APPEND "${outCppFile}"`
			`"};`

			`const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );`
			`"`
			`)`
Generate const keyword_hash maps at compile time Or else the map gets pointlessly recreated during footprint loading thousands of times 2022-08-09 00:45:37 -04:00

Simplify the generated lexers to the use of a templated class 2022-08-21 15:14:31 -04:00			`file( APPEND "${outCppFile}"`
Generate const keyword_hash maps at compile time Or else the map gets pointlessly recreated during footprint loading thousands of times 2022-08-09 00:45:37 -04:00			`"`

			`const KEYWORD_MAP ${LEXERCLASS}::keywords_hash({`
			`"`
			`)`

			`set( TOKEN_NUM 0 )`

			`math( EXPR tokensAfter "${tokensAfter} - 1" )`

			`foreach( token ${tokens} )`
			`file(APPEND "${outCppFile}" " { \"${token}\", ${TOKEN_NUM} }" )`
Simplify the generated lexers to the use of a templated class 2022-08-21 15:14:31 -04:00
Generate const keyword_hash maps at compile time Or else the map gets pointlessly recreated during footprint loading thousands of times 2022-08-09 00:45:37 -04:00			`if( TOKEN_NUM EQUAL tokensAfter )`
			`file( APPEND "${outCppFile}" "\n" )`
			`else( TOKEN_NUM EQUAL tokensAfter )`
			`file( APPEND "${outCppFile}" ",\n" )`
			`endif()`

			`math( EXPR TOKEN_NUM "${TOKEN_NUM} + 1" )`
			`endforeach()`

			`file( APPEND "${outCppFile}"`
			`"});")`