diff --git a/.gitignore b/.gitignore index ce5a7e2..07f3f91 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ /doc/*.html /doc/*.pdf + +a.out diff --git a/COPYING3 b/COPYING3 new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/COPYING3 @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Rakefile b/Rakefile index da58e7c..9eb4f04 100644 --- a/Rakefile +++ b/Rakefile @@ -15,6 +15,7 @@ STAGES = Dir.glob('boot/stage*') CLEAN.include 'build/boot', 'build/valid' CLEAN.include 'doc/*.pdf' +CLOBBER.include 'build' def compile(*arguments) sh(ENV.fetch('CC', 'gcc'), '-fpie', '-g', *arguments) diff --git a/config-lang.in b/config-lang.in new file mode 100644 index 0000000..1bc1697 --- /dev/null +++ b/config-lang.in @@ -0,0 +1,21 @@ +# Top level configure fragment for gcc Elna frontend. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +language="elna" +gcc_subdir="elna/gcc" + +. ${srcdir}/elna/gcc/config-lang.in diff --git a/frontend/ast.cc b/frontend/ast.cc new file mode 100644 index 0000000..e067937 --- /dev/null +++ b/frontend/ast.cc @@ -0,0 +1,1178 @@ +/* Abstract syntax tree representation. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/frontend/ast.h" + +namespace elna::frontend +{ + void empty_visitor::not_implemented() + { + __builtin_unreachable(); + } + + void empty_visitor::visit(named_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(array_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(pointer_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(program *) + { + not_implemented(); + } + + void empty_visitor::visit(type_declaration *) + { + not_implemented(); + } + + void empty_visitor::visit(record_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(union_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(procedure_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(enumeration_type_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(variable_declaration *) + { + not_implemented(); + } + + void empty_visitor::visit(constant_declaration *) + { + not_implemented(); + } + + void empty_visitor::visit(procedure_declaration *) + { + not_implemented(); + } + + void empty_visitor::visit(assign_statement *) + { + not_implemented(); + } + + void empty_visitor::visit(if_statement *) + { + not_implemented(); + } + + void empty_visitor::visit(import_declaration *) + { + not_implemented(); + } + + void empty_visitor::visit(while_statement *) + { + not_implemented(); + } + + void empty_visitor::visit(return_statement *) + { + not_implemented(); + } + + void empty_visitor::visit(defer_statement *) + { + not_implemented(); + } + + void empty_visitor::visit(case_statement *) + { + not_implemented(); + } + + void empty_visitor::visit(procedure_call *) + { + not_implemented(); + } + + void empty_visitor::visit(unit *) + { + not_implemented(); + } + + void empty_visitor::visit(cast_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(traits_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(binary_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(unary_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(variable_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(array_access_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(field_access_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(dereference_expression *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + void empty_visitor::visit(literal *) + { + not_implemented(); + } + + node::node(const struct position position) + : source_position(position) + { + } + + node::~node() + { + } + + const struct position& node::position() const + { + return this->source_position; + } + + cast_expression *expression::is_cast() + { + return nullptr; + } + + traits_expression *expression::is_traits() + { + return nullptr; + } + + binary_expression *expression::is_binary() + { + return nullptr; + } + + unary_expression *expression::is_unary() + { + return nullptr; + } + + designator_expression *expression::is_designator() + { + return nullptr; + } + + procedure_call *expression::is_call_expression() + { + return nullptr; + } + + literal_expression *expression::is_literal() + { + return nullptr; + } + + type_expression::type_expression(const struct position position) + : node(position) + { + } + + named_type_expression *type_expression::is_named() + { + return nullptr; + } + + array_type_expression *type_expression::is_array() + { + return nullptr; + } + + pointer_type_expression *type_expression::is_pointer() + { + return nullptr; + } + + record_type_expression *type_expression::is_record() + { + return nullptr; + } + + union_type_expression *type_expression::is_union() + { + return nullptr; + } + + procedure_type_expression *type_expression::is_procedure() + { + return nullptr; + } + + enumeration_type_expression *type_expression::is_enumeration() + { + return nullptr; + } + + named_type_expression::named_type_expression(const struct position position, const std::string& name) + : type_expression(position), name(name) + { + } + + void named_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + named_type_expression *named_type_expression::is_named() + { + return this; + } + + array_type_expression::array_type_expression(const struct position position, + type_expression *base, const std::uint32_t size) + : type_expression(position), m_base(base), size(size) + { + } + + array_type_expression::~array_type_expression() + { + delete m_base; + } + + void array_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + array_type_expression *array_type_expression::is_array() + { + return this; + } + + type_expression& array_type_expression::base() + { + return *m_base; + } + + pointer_type_expression::pointer_type_expression(const struct position position, + type_expression *base) + : type_expression(position), m_base(base) + { + } + + pointer_type_expression::~pointer_type_expression() + { + delete m_base; + } + + void pointer_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + pointer_type_expression *pointer_type_expression::is_pointer() + { + return this; + } + + type_expression& pointer_type_expression::base() + { + return *m_base; + } + + record_type_expression::record_type_expression(const struct position position, + std::vector&& fields) + : type_expression(position), fields(std::move(fields)) + { + } + + record_type_expression::~record_type_expression() + { + for (const field_declaration& field : this->fields) + { + delete field.second; + } + } + + void record_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + record_type_expression *record_type_expression::is_record() + { + return this; + } + + union_type_expression::union_type_expression(const struct position position, + std::vector&& fields) + : type_expression(position), fields(std::move(fields)) + { + } + + union_type_expression::~union_type_expression() + { + for (const field_declaration& field : this->fields) + { + delete field.second; + } + } + + void union_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + union_type_expression *union_type_expression::is_union() + { + return this; + } + + variable_declaration::variable_declaration(const struct position position, + std::vector&& identifier, std::shared_ptr variable_type, + expression *body) + : node(position), m_variable_type(variable_type), identifiers(std::move(identifier)), body(body) + { + } + + variable_declaration::variable_declaration(const struct position position, + std::vector&& identifier, std::shared_ptr variable_type, + std::monostate) + : node(position), m_variable_type(variable_type), identifiers(std::move(identifier)), is_extern(true) + { + } + + void variable_declaration::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + bool variable_declaration::has_initializer() const + { + return this->is_extern || this->body != nullptr; + } + + type_expression& variable_declaration::variable_type() + { + return *m_variable_type; + } + + declaration::declaration(const struct position position, identifier_definition identifier) + : node(position), identifier(identifier) + { + } + + constant_declaration::constant_declaration(const struct position position, identifier_definition identifier, + expression *body) + : declaration(position, identifier), m_body(body) + { + } + + void constant_declaration::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + expression& constant_declaration::body() + { + return *m_body; + } + + constant_declaration::~constant_declaration() + { + delete m_body; + } + + procedure_type_expression::procedure_type_expression(const struct position position, return_t return_type) + : type_expression(position), return_type(return_type) + { + } + + procedure_type_expression::~procedure_type_expression() + { + if (return_type.proper_type != nullptr) + { + delete return_type.proper_type; + } + for (const type_expression *parameter : this->parameters) + { + delete parameter; + } + } + + void procedure_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + procedure_type_expression *procedure_type_expression::is_procedure() + { + return this; + } + + enumeration_type_expression::enumeration_type_expression(const struct position position, std::vector&& members) + : type_expression(position), members(members) + { + } + + void enumeration_type_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + enumeration_type_expression *enumeration_type_expression::is_enumeration() + { + return this; + } + + procedure_declaration::procedure_declaration(const struct position position, identifier_definition identifier, + procedure_type_expression *heading, block&& body) + : declaration(position, identifier), m_heading(heading), body(std::move(body)) + { + } + + procedure_declaration::procedure_declaration(const struct position position, identifier_definition identifier, + procedure_type_expression *heading) + : declaration(position, identifier), m_heading(heading), body(std::nullopt) + { + } + + void procedure_declaration::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + procedure_type_expression& procedure_declaration::heading() + { + return *m_heading; + } + + procedure_declaration::~procedure_declaration() + { + delete m_heading; + } + + type_declaration::type_declaration(const struct position position, identifier_definition identifier, + type_expression *body) + : declaration(position, identifier), m_body(body) + { + } + + type_declaration::~type_declaration() + { + delete m_body; + } + + void type_declaration::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + type_expression& type_declaration::body() + { + return *m_body; + } + + block::block(std::vector&& constants, std::vector&& variables, + std::vector&& body) + : m_variables(std::move(variables)), m_constants(std::move(constants)), m_body(std::move(body)) + { + } + + block::block(block&& that) + : m_variables(std::move(that.m_variables)), m_constants(std::move(that.m_constants)), + m_body(std::move(that.m_body)) + { + } + + block& block::operator=(block&& that) + { + std::swap(m_variables, that.m_variables); + std::swap(m_constants, that.m_constants); + std::swap(m_body, that.m_body); + + return *this; + } + + const std::vector& block::variables() + { + return m_variables; + } + + const std::vector& block::constants() + { + return m_constants; + } + + const std::vector& block::body() + { + return m_body; + } + + block::~block() + { + for (statement *body_statement : this->body()) + { + delete body_statement; + } + for (variable_declaration *variable : this->variables()) + { + delete variable; + } + for (constant_declaration *constant : this->constants()) + { + delete constant; + } + } + + unit::unit(const struct position position) + : node(position) + { + } + + void unit::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + unit::~unit() + { + for (procedure_declaration *procedure : this->procedures) + { + delete procedure; + } + for (variable_declaration *variable : this->variables) + { + delete variable; + } + for (type_declaration *type : this->types) + { + delete type; + } + for (constant_declaration *constant : this->constants) + { + delete constant; + } + for (import_declaration *declaration : this->imports) + { + delete declaration; + } + } + + program::program(const struct position position) + : unit(position) + { + } + + void program::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + program::~program() + { + for (statement *body_statement : this->body) + { + delete body_statement; + } + } + + literal_expression::literal_expression() + { + } + + literal_expression *literal_expression::is_literal() + { + return this; + } + + defer_statement::defer_statement(const struct position position, std::vector&& statements) + : node(position), statements(std::move(statements)) + { + } + + void defer_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + defer_statement::~defer_statement() + { + for (statement *body_statement : statements) + { + delete body_statement; + } + } + + designator_expression::designator_expression() + { + } + + designator_expression::~designator_expression() + { + } + + designator_expression *designator_expression::is_designator() + { + return this; + } + + void designator_expression::accept(parser_visitor *visitor) + { + if (variable_expression *node = is_variable()) + { + return visitor->visit(node); + } + else if (array_access_expression *node = is_array_access()) + { + return visitor->visit(node); + } + else if (field_access_expression *node = is_field_access()) + { + return visitor->visit(node); + } + else if (dereference_expression *node = is_dereference()) + { + return visitor->visit(node); + } + __builtin_unreachable(); + } + + variable_expression::variable_expression(const struct position position, const std::string& name) + : node(position), name(name) + { + } + + void variable_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + variable_expression *variable_expression::is_variable() + { + return this; + } + + array_access_expression::array_access_expression(const struct position position, + expression *base, expression *index) + : node(position), m_base(base), m_index(index) + { + } + + void array_access_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + expression& array_access_expression::index() + { + return *m_index; + } + + expression& array_access_expression::base() + { + return *m_base; + } + + array_access_expression *array_access_expression::is_array_access() + { + return this; + } + + array_access_expression::~array_access_expression() + { + delete m_index; + delete m_base; + } + + field_access_expression::field_access_expression(const struct position position, + expression *base, const std::string& field) + : node(position), m_base(base), m_field(field) + { + } + + void field_access_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + expression& field_access_expression::base() + { + return *m_base; + } + + std::string& field_access_expression::field() + { + return m_field; + } + + field_access_expression *field_access_expression::is_field_access() + { + return this; + } + + field_access_expression::~field_access_expression() + { + delete m_base; + } + + dereference_expression::dereference_expression(const struct position position, + expression *base) + : node(position), m_base(base) + { + } + + void dereference_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + expression& dereference_expression::base() + { + return *m_base; + } + + dereference_expression *dereference_expression::is_dereference() + { + return this; + } + + dereference_expression::~dereference_expression() + { + delete m_base; + } + + binary_expression::binary_expression(const struct position position, expression *lhs, + expression *rhs, const binary_operator operation) + : node(position), m_lhs(lhs), m_rhs(rhs), m_operator(operation) + { + } + + void binary_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + binary_expression *binary_expression::is_binary() + { + return this; + } + + expression& binary_expression::lhs() + { + return *m_lhs; + } + + expression& binary_expression::rhs() + { + return *m_rhs; + } + + binary_operator binary_expression::operation() const + { + return m_operator; + } + + binary_expression::~binary_expression() + { + delete m_lhs; + delete m_rhs; + } + + unary_expression::unary_expression(const struct position position, expression *operand, + const unary_operator operation) + : node(position), m_operand(std::move(operand)), m_operator(operation) + { + } + + void unary_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + unary_expression *unary_expression::is_unary() + { + return this; + } + + expression& unary_expression::operand() + { + return *m_operand; + } + + unary_operator unary_expression::operation() const + { + return this->m_operator; + } + + unary_expression::~unary_expression() + { + delete m_operand; + } + + procedure_call::procedure_call(const struct position position, designator_expression *callable) + : node(position), m_callable(callable) + { + } + + void procedure_call::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + procedure_call *procedure_call::is_call_expression() + { + return this; + } + + designator_expression& procedure_call::callable() + { + return *m_callable; + } + + procedure_call::~procedure_call() + { + for (expression *const argument : arguments) + { + delete argument; + } + delete m_callable; + } + + cast_expression::cast_expression(const struct position position, type_expression *target, expression *value) + : node(position), m_target(target), m_value(value) + { + } + + void cast_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + cast_expression *cast_expression::is_cast() + { + return this; + } + + type_expression& cast_expression::target() + { + return *m_target; + } + + expression& cast_expression::value() + { + return *m_value; + } + + cast_expression::~cast_expression() + { + delete m_target; + delete m_value; + } + + traits_expression::traits_expression(const struct position position, const std::string& name) + : node(position), name(name) + { + } + + traits_expression::~traits_expression() + { + for (const type_expression *parameter : this->parameters) + { + delete parameter; + } + } + + void traits_expression::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + traits_expression *traits_expression::is_traits() + { + return this; + } + + conditional_statements::conditional_statements(expression *prerequisite, std::vector&& statements) + : m_prerequisite(prerequisite), statements(std::move(statements)) + { + } + + expression& conditional_statements::prerequisite() + { + return *m_prerequisite; + } + + conditional_statements::~conditional_statements() + { + delete m_prerequisite; + for (auto statement : statements) + { + delete statement; + } + } + + return_statement::return_statement(const struct position position, expression *return_expression) + : node(position), m_return_expression(return_expression) + { + } + + void return_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + expression& return_statement::return_expression() + { + return *m_return_expression; + } + + return_statement::~return_statement() + { + delete m_return_expression; + } + + case_statement::case_statement(const struct position position, + expression *condition, std::vector&& cases, std::vector *alternative) + : node(position), m_condition(condition), cases(std::move(cases)), alternative(alternative) + { + } + + void case_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + expression& case_statement::condition() + { + return *m_condition; + } + + assign_statement::assign_statement(const struct position position, designator_expression *lvalue, + expression *rvalue) + : node(position), m_lvalue(lvalue), m_rvalue(rvalue) + { + } + + void assign_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + variable_expression *designator_expression::is_variable() + { + return nullptr; + } + + array_access_expression *designator_expression::is_array_access() + { + return nullptr; + } + + field_access_expression *designator_expression::is_field_access() + { + return nullptr; + } + + dereference_expression *designator_expression::is_dereference() + { + return nullptr; + } + + designator_expression& assign_statement::lvalue() + { + return *m_lvalue; + } + + expression& assign_statement::rvalue() + { + return *m_rvalue; + } + + assign_statement::~assign_statement() + { + delete m_rvalue; + } + + if_statement::if_statement(const struct position position, conditional_statements *body, + std::vector&& branches, + std::vector *alternative) + : node(position), m_body(body), branches(std::move(branches)), alternative(alternative) + { + } + + void if_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + conditional_statements& if_statement::body() + { + return *m_body; + } + + if_statement::~if_statement() + { + delete m_body; + for (const auto branch : branches) + { + delete branch; + } + delete this->alternative; + } + + import_declaration::import_declaration(const struct position position, std::vector&& segments) + : node(position), segments(std::move(segments)) + { + } + + void import_declaration::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + while_statement::while_statement(const struct position position, conditional_statements *body, + std::vector&& branches) + : node(position), m_body(body), branches(std::move(branches)) + { + } + + void while_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + conditional_statements& while_statement::body() + { + return *m_body; + } + + while_statement::~while_statement() + { + delete m_body; + for (const auto branch : branches) + { + delete branch; + } + } + + const char *print_binary_operator(const binary_operator operation) + { + switch (operation) + { + case binary_operator::sum: + return "+"; + case binary_operator::subtraction: + return "-"; + case binary_operator::multiplication: + return "*"; + case binary_operator::division: + return "/"; + case binary_operator::remainder: + return "%"; + case binary_operator::equals: + return "="; + case binary_operator::not_equals: + return "<>"; + case binary_operator::less: + return "<"; + case binary_operator::less_equal: + return "<="; + case binary_operator::greater: + return ">"; + case binary_operator::greater_equal: + return ">="; + case binary_operator::conjunction: + return "and"; + case binary_operator::disjunction: + return "or"; + case binary_operator::exclusive_disjunction: + return "xor"; + case binary_operator::shift_left: + return "<<"; + case binary_operator::shift_right: + return ">>"; + } + __builtin_unreachable(); + }; +} diff --git a/frontend/dependency.cc b/frontend/dependency.cc new file mode 100644 index 0000000..25658f8 --- /dev/null +++ b/frontend/dependency.cc @@ -0,0 +1,102 @@ +/* Dependency graph analysis. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/frontend/dependency.h" + +#include +#include +#include + +#include "elna/frontend/driver.h" +#include "elna/frontend/semantic.h" +#include "parser.hh" + +namespace elna::frontend +{ + dependency::dependency(const char *path) + : error_container(path) + { + } + + dependency read_source(std::istream& entry_point, const char *entry_path) + { + driver parse_driver{ entry_path }; + lexer tokenizer(entry_point); + yy::parser parser(tokenizer, parse_driver); + + dependency outcome{ entry_path }; + if (parser()) + { + std::swap(outcome.errors(), parse_driver.errors()); + return outcome; + } + else + { + std::swap(outcome.tree, parse_driver.tree); + } + declaration_visitor declaration_visitor(entry_path); + outcome.tree->accept(&declaration_visitor); + + if (!declaration_visitor.errors().empty()) + { + std::swap(outcome.errors(), declaration_visitor.errors()); + } + outcome.unresolved = declaration_visitor.unresolved; + + return outcome; + } + + error_list analyze_semantics(const char *path, std::unique_ptr& tree, symbol_bag bag) + { + name_analysis_visitor name_analyser(path, bag); + tree->accept(&name_analyser); + + if (name_analyser.has_errors()) + { + return std::move(name_analyser.errors()); + } + type_analysis_visitor type_analyzer(path, bag); + tree->accept(&type_analyzer); + + if (type_analyzer.has_errors()) + { + return std::move(type_analyzer.errors()); + } + return error_list{}; + } + + std::filesystem::path build_path(const std::vector& segments) + { + std::filesystem::path result; + std::vector::const_iterator segment_iterator = std::cbegin(segments); + + if (segment_iterator == std::cend(segments)) + { + return result; + } + result = *segment_iterator; + + ++segment_iterator; + for (; segment_iterator != std::cend(segments); ++segment_iterator) + { + result /= *segment_iterator; + } + result.replace_extension(".elna"); + + return result; + } +} diff --git a/frontend/driver.cc b/frontend/driver.cc new file mode 100644 index 0000000..1c20d09 --- /dev/null +++ b/frontend/driver.cc @@ -0,0 +1,124 @@ +/* Parsing driver. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/frontend/driver.h" + +namespace elna::frontend +{ + position make_position(const yy::location& location) + { + position result; + result.line = static_cast(location.begin.line); + result.column = static_cast(location.begin.column); + + return result; + } + + syntax_error::syntax_error(const std::string& message, + const char *input_file, const yy::location& location) + : error(input_file, make_position(location)), message(message) + { + } + + std::string syntax_error::what() const + { + return message; + } + + driver::driver(const char *input_file) + : error_container(input_file) + { + } + + char escape_char(char escape) + { + switch (escape) + { + case 'n': + return '\n'; + case 'a': + return '\a'; + case 'b': + return '\b'; + case 't': + return '\t'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '\'': + return '\''; + case '"': + return '"'; + case '?': + return '\?'; + case '0': + return '\0'; + default: + return escape_invalid_char; + } + } + + std::optional escape_string(const char *escape) + { + std::string result; + const char *current_position = escape + 1; + + while (*current_position != '\0') + { + if (*current_position == '\\' && *(current_position + 1) == 'x') + { + current_position += 2; + + std::size_t processed; + char character = static_cast(std::stoi(current_position, &processed, 16)); + if (processed == 0) + { + return std::nullopt; + } + else + { + current_position += processed - 1; + result.push_back(character); + } + } + else if (*current_position == '\\') + { + ++current_position; + + char escape = escape_char(*current_position); + if (escape == escape_invalid_char) + { + return std::nullopt; + } + result.push_back(escape); + } + else + { + result.push_back(*current_position); + } + ++current_position; + } + result.pop_back(); // Remove the terminating quote character. + + return result; + } +} diff --git a/frontend/lexer.ll b/frontend/lexer.ll new file mode 100644 index 0000000..f14497b --- /dev/null +++ b/frontend/lexer.ll @@ -0,0 +1,320 @@ +/* Lexical analyzer. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +%{ +#define YY_NO_UNISTD_H +#define YY_USER_ACTION this->location.columns(yyleng); + +#include +#include "parser.hh" + +#undef YY_DECL +#define YY_DECL yy::parser::symbol_type elna::frontend::lexer::lex(driver& driver) +#define yyterminate() return yy::parser::make_YYEOF(this->location) +%} + +%option c++ noyywrap never-interactive +%option yyclass="lexer" + +%x IN_COMMENT + +ID1 [A-Za-z_] +ID2 [A-Za-z0-9_] +HIGIT [0-9a-fA-F] +BIGIT [01] + +%% +%{ + this->location.step(); +%} + +{ + \*\) BEGIN(INITIAL); + [^*\n]+ ; /* Eat comment in chunks. */ + \* ; /* Eat the lone star. */ + \n+ { + this->location.lines(yyleng); + this->location.step(); + } +} +\(\* BEGIN(IN_COMMENT); +[ \t\r] { + this->location.step(); +} +\n+ { + this->location.lines(yyleng); +} +if { + return yy::parser::make_IF(this->location); +} +then { + return yy::parser::make_THEN(this->location); +} +else { + return yy::parser::make_ELSE(this->location); +} +elsif { + return yy::parser::make_ELSIF(this->location); +} +while { + return yy::parser::make_WHILE(this->location); +} +do { + return yy::parser::make_DO(this->location); +} +proc { + return yy::parser::make_PROCEDURE(this->location); +} +begin { + return yy::parser::make_BEGIN_BLOCK(this->location); + } +end { + return yy::parser::make_END_BLOCK(this->location); +} +extern { + return yy::parser::make_EXTERN(this->location); +} +const { + return yy::parser::make_CONST(this->location); +} +var { + return yy::parser::make_VAR(this->location); +} +type { + return yy::parser::make_TYPE(this->location); +} +record { + return yy::parser::make_RECORD(this->location); +} +union { + return yy::parser::make_UNION(this->location); +} +true { + return yy::parser::make_BOOLEAN(true, this->location); +} +false { + return yy::parser::make_BOOLEAN(false, this->location); +} +nil { + return yy::parser::make_NIL(this->location); +} +\& { + return yy::parser::make_AND(this->location); +} +xor { + return yy::parser::make_XOR(this->location); +} +or { + return yy::parser::make_OR(this->location); +} +\| { + return yy::parser::make_PIPE(this->location); +} +\~ { + return yy::parser::make_NOT(this->location); +} +return { + return yy::parser::make_RETURN(this->location); +} +module { + return yy::parser::make_MODULE(this->location); +} +program { + return yy::parser::make_PROGRAM(this->location); +} +import { + return yy::parser::make_IMPORT(this->location); +} +cast { + return yy::parser::make_CAST(this->location); +} +defer { + return yy::parser::make_DEFER(this->location); +} +case { + return yy::parser::make_CASE(this->location); +} +of { + return yy::parser::make_OF(this->location); +} +{ID1}{ID2}* { + return yy::parser::make_IDENTIFIER(yytext, this->location); +} +#{ID1}{ID2}* { + return yy::parser::make_TRAIT(yytext + 1, this->location); +} +[[:digit:]]+u { + unsigned long result = strtoul(yytext, NULL, 10); + + if (errno == ERANGE) + { + REJECT; + } + else + { + return yy::parser::make_WORD(result, this->location); + } +} +[[:digit:]]+ { + long result = strtol(yytext, NULL, 10); + + if (errno == ERANGE) + { + REJECT; + } + else + { + return yy::parser::make_INTEGER(result, this->location); + } +} +0x{HIGIT}+ { + unsigned long result = strtoul(yytext, NULL, 16); + + if (errno == ERANGE) + { + REJECT; + } + else + { + return yy::parser::make_WORD(result, this->location); + } +} +0b{BIGIT}+ { + unsigned long result = strtoul(yytext, NULL, 2); + + if (errno == ERANGE) + { + REJECT; + } + else + { + return yy::parser::make_WORD(result, this->location); + } +} +[[:digit:]]+\.[[:digit:]]+ { + float result = strtof(yytext, NULL); + + if (errno == ERANGE) + { + REJECT; + } + else + { + return yy::parser::make_FLOAT(result, this->location); + } +} +'[[:print:]]+' { + std::optional result = escape_string(yytext); + if (!result.has_value() || result.value().size() != 1) + { + REJECT; + } + return yy::parser::make_CHARACTER(result.value(), this->location); +} +\"[[:print:]]*\" { + std::optional result = escape_string(yytext); + if (!result.has_value()) + { + REJECT; + } + return yy::parser::make_STRING(result.value(), this->location); +} +\( { + return yy::parser::make_LEFT_PAREN(this->location); +} +\) { + return yy::parser::make_RIGHT_PAREN(this->location); +} +\[ { + return yy::parser::make_LEFT_SQUARE(this->location); +} +\] { + return yy::parser::make_RIGHT_SQUARE(this->location); +} +\<\< { + return yy::parser::make_SHIFT_LEFT(this->location); +} +\>\> { + return yy::parser::make_SHIFT_RIGHT(this->location); +} +\>= { + return yy::parser::make_GREATER_EQUAL(this->location); +} +\<= { + return yy::parser::make_LESS_EQUAL(this->location); +} +\> { + return yy::parser::make_GREATER_THAN(this->location); +} +\< { + return yy::parser::make_LESS_THAN(this->location); +} +\<\> { + return yy::parser::make_NOT_EQUAL(this->location); +} += { + return yy::parser::make_EQUALS(this->location); +} +; { + return yy::parser::make_SEMICOLON(this->location); +} +\. { + return yy::parser::make_DOT(this->location); +} +, { + return yy::parser::make_COMMA(this->location); +} +\+ { + return yy::parser::make_PLUS(this->location); +} +\-> { + return yy::parser::make_ARROW(this->location); +} +\- { + return yy::parser::make_MINUS(this->location); +} +\* { + return yy::parser::make_MULTIPLICATION(this->location); +} +\/ { + return yy::parser::make_DIVISION(this->location); +} +% { + return yy::parser::make_REMAINDER(this->location); +} +:= { + return yy::parser::make_ASSIGNMENT(this->location); +} +: { + return yy::parser::make_COLON(this->location); +} +\^ { + return yy::parser::make_HAT(this->location); +} +@ { + return yy::parser::make_AT(this->location); +} +! { + return yy::parser::make_EXCLAMATION(this->location); +} +. { + std::stringstream ss; + + ss << "Illegal character 0x" << std::hex << static_cast(yytext[0]); + driver.add_error(ss.str(), driver.input_file, this->location); +} +%% diff --git a/frontend/parser.yy b/frontend/parser.yy new file mode 100644 index 0000000..bace8d7 --- /dev/null +++ b/frontend/parser.yy @@ -0,0 +1,594 @@ +/* Syntax analyzer. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +%require "3.4" +%language "c++" + +%code { + using namespace elna; +} + +%code requires { + #include + #include + #include "elna/frontend/driver.h" + + #if !defined(yyFlexLexerOnce) + #include + #endif + + namespace elna::frontend + { + class lexer; + } +} + +%code provides { + namespace elna::frontend + { + + class lexer: public yyFlexLexer + { + public: + yy::location location; + + lexer(std::istream& arg_yyin) + : yyFlexLexer(&arg_yyin) + { + } + + yy::parser::symbol_type lex(driver& driver); + }; + + } +} + +%define api.token.raw +%define api.token.constructor +%define api.value.type variant + +%parse-param {elna::frontend::lexer& lexer} +%param {elna::frontend::driver& driver} +%locations + +%header + +%code { + #define yylex lexer.lex +} +%start program; + +%token IDENTIFIER +%token TRAIT +%token INTEGER +%token WORD +%token FLOAT +%token CHARACTER +%token STRING +%token BOOLEAN +%token LEFT_PAREN "(" RIGHT_PAREN ")" LEFT_SQUARE "[" RIGHT_SQUARE "]" +%token ASSIGNMENT ":=" + ARROW "->" EXCLAMATION "!" + AT "@" HAT "^" + COLON ":" SEMICOLON ";" DOT "." COMMA "," +%token NOT "~" + CAST "cast" + NIL "nil" + CONST "const" + VAR "var" + PROCEDURE "proc" + TYPE "type" + RECORD "record" + UNION "union" + EXTERN "extern" + IF "if" + WHILE "while" + DO "do" + THEN "then" + ELSE "else" + ELSIF "elsif" + RETURN "return" + PROGRAM "program" + MODULE "module" + IMPORT "import" + BEGIN_BLOCK "begin" + END_BLOCK "end" + DEFER "defer" + CASE "case" + OF "of" + PIPE "|" +%token OR "or" AND "&" XOR "xor" + EQUALS "=" NOT_EQUAL "<>" LESS_THAN "<" GREATER_THAN ">" LESS_EQUAL "<=" GREATER_EQUAL ">=" + SHIFT_LEFT "<<" SHIFT_RIGHT ">>" + PLUS "+" MINUS "-" + MULTIPLICATION "*" DIVISION "/" REMAINDER "%" + +%left "or" "&" "xor" +%left "=" "<>" "<" ">" "<=" ">=" +%left "<<" ">>" +%left "+" "-" +%left "*" "/" "%" + +%type literal; +%type > case_labels; +%type switch_case; +%type > switch_cases; +%type constant_declaration; +%type > constant_part constant_declarations; +%type variable_declaration; +%type > variable_declarations variable_part; +%type type_expression; +%type > type_expressions; +%type traits_expression; +%type expression operand simple_expression; +%type unary_expression; +%type binary_expression; +%type > expressions actual_parameter_list; +%type designator_expression; +%type call_expression; +%type return_statement; +%type statement; +%type > required_statements optional_statements statement_part; +%type procedure_declaration; +%type , elna::frontend::procedure_type_expression *>> procedure_heading; +%type return_declaration; +%type > procedure_declarations procedure_part; +%type type_declaration; +%type > type_declarations type_part; +%type > block; +%type field_declaration formal_parameter; +%type >> + optional_fields required_fields formal_parameters formal_parameter_list; +%type > elsif_then_statements elsif_do_statements; +%type *> else_statements; +%type cast_expression; +%type identifier_definition; +%type > identifier_definitions; +%type > identifiers import_declaration; +%type > import_declarations import_part; +%% +program: + "program" ";" import_part constant_part type_part variable_part procedure_part statement_part "end" "." + { + auto tree = new frontend::program(frontend::make_position(@1)); + + std::swap(tree->imports, $3); + std::swap(tree->constants, $4); + std::swap(tree->types , $5); + std::swap(tree->variables, $6); + std::swap(tree->procedures, $7); + std::swap(tree->body, $8); + + driver.tree.reset(tree); + } + | "module" ";" import_part constant_part type_part variable_part procedure_part "end" "." + { + auto tree = new frontend::unit(frontend::make_position(@1)); + + std::swap(tree->imports, $3); + std::swap(tree->constants, $4); + std::swap(tree->types , $5); + std::swap(tree->variables, $6); + std::swap(tree->procedures, $7); + + driver.tree.reset(tree); + } +block: constant_part variable_part statement_part "end" + { + $$ = std::make_unique(std::move($1), std::move($2), std::move($3)); + } +statement_part: + /* no statements */ {} + | "begin" required_statements { std::swap($$, $2); } + | return_statement { $$.push_back($1); } + | "begin" required_statements ";" return_statement + { + std::swap($$, $2); + $$.push_back($4); + } +identifier_definition: + IDENTIFIER "*" { $$ = frontend::identifier_definition{ $1, true }; } + | IDENTIFIER { $$ = frontend::identifier_definition{ $1, false }; } +identifier_definitions: + identifier_definition "," identifier_definitions + { + std::swap($$, $3); + $$.emplace($$.cbegin(), $1); + } + | identifier_definition { $$.emplace_back(std::move($1)); } +return_declaration: + /* proper procedure */ {} + | "->" "!" { $$ = frontend::procedure_type_expression::return_t(std::monostate{}); } + | "->" type_expression { $$ = frontend::procedure_type_expression::return_t($2); } +procedure_heading: formal_parameter_list return_declaration + { + $$.second = new frontend::procedure_type_expression(frontend::make_position(@1), std::move($2)); + for (auto& [name, type] : $1) + { + $$.first.emplace_back(std::move(name)); + $$.second->parameters.push_back(type); + } + } +procedure_declaration: + "proc" identifier_definition procedure_heading ";" block ";" + { + $$ = new frontend::procedure_declaration(frontend::make_position(@1), std::move($2), $3.second, std::move(*$5)); + std::swap($3.first, $$->parameter_names); + } + | "proc" identifier_definition procedure_heading ";" "extern" ";" + { + $$ = new frontend::procedure_declaration(frontend::make_position(@1), std::move($2), $3.second); + std::swap($3.first, $$->parameter_names); + } +procedure_declarations: + procedure_declaration procedure_declarations + { + std::swap($$, $2); + $$.emplace($$.cbegin(), std::move($1)); + } + | procedure_declaration { $$.emplace_back(std::move($1)); } +procedure_part: + /* no procedure definitions */ {} + | procedure_declarations { std::swap($$, $1); } +call_expression: designator_expression actual_parameter_list + { + $$ = new frontend::procedure_call(frontend::make_position(@1), $1); + std::swap($$->arguments, $2); + } +cast_expression: "cast" "(" expression ":" type_expression ")" + { $$ = new frontend::cast_expression(frontend::make_position(@1), $5, $3); } +elsif_do_statements: + "elsif" expression "do" optional_statements elsif_do_statements + { + frontend::conditional_statements *branch = new frontend::conditional_statements($2, std::move($4)); + std::swap($5, $$); + $$.emplace($$.begin(), branch); + } + | {} +else_statements: + "else" optional_statements { $$ = new std::vector(std::move($2)); } + | { $$ = nullptr; } +elsif_then_statements: + "elsif" expression "then" optional_statements elsif_then_statements + { + frontend::conditional_statements *branch = new frontend::conditional_statements($2, std::move($4)); + std::swap($5, $$); + $$.emplace($$.begin(), branch); + } + | {} +return_statement: "return" expression + { $$ = new frontend::return_statement(frontend::make_position(@1), $2); } +literal: + INTEGER { $$ = new frontend::literal(frontend::make_position(@1), $1); } + | WORD { $$ = new frontend::literal(frontend::make_position(@1), $1); } + | FLOAT { $$ = new frontend::literal(frontend::make_position(@1), $1); } + | BOOLEAN { $$ = new frontend::literal(frontend::make_position(@1), $1); } + | CHARACTER { $$ = new frontend::literal(frontend::make_position(@1), $1.at(0)); } + | "nil" { $$ = new frontend::literal(frontend::make_position(@1), nullptr); } + | STRING { $$ = new frontend::literal(frontend::make_position(@1), $1); } +traits_expression: + TRAIT "(" type_expressions ")" + { + $$ = new frontend::traits_expression(frontend::make_position(@1), $1); + std::swap($3, $$->parameters); + } +simple_expression: + literal { $$ = $1; } + | designator_expression { $$ = $1; } + | traits_expression { $$ = $1; } + | cast_expression { $$ = $1; } + | call_expression { $$ = $1; } + | "(" expression ")" { $$ = $2; } +operand: + unary_expression { $$ = $1; } + | simple_expression { $$ = $1; } +expression: + binary_expression { $$ = $1; } + | operand { $$ = $1; } +binary_expression: + expression "*" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::multiplication); + } + | expression "/" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::division); + } + | expression "%" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::remainder); + } + | expression "+" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::sum); + } + | expression "-" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::subtraction); + } + | expression "=" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::equals); + } + | expression "<>" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::not_equals); + } + | expression "<" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::less); + } + | expression ">" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::greater); + } + | expression "<=" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, + frontend::binary_operator::less_equal); + } + | expression ">=" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::greater_equal); + } + | expression "&" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::conjunction); + } + | expression "or" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::disjunction); + } + | expression "xor" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, + frontend::binary_operator::exclusive_disjunction); + } + | expression "<<" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::shift_left); + } + | expression ">>" expression + { + $$ = new frontend::binary_expression(frontend::make_position(@2), $1, $3, frontend::binary_operator::shift_right); + } +unary_expression: + "@" operand + { + $$ = new frontend::unary_expression(frontend::make_position(@1), $2, frontend::unary_operator::reference); + } + | "~" operand + { + $$ = new frontend::unary_expression(frontend::make_position(@1), $2, frontend::unary_operator::negation); + } + | "-" operand + { + $$ = new frontend::unary_expression(frontend::make_position(@1), $2, frontend::unary_operator::minus); + } +expressions: + expression "," expressions + { + std::swap($$, $3); + $$.emplace($$.cbegin(), $1); + } + | expression { $$.push_back($1); } +type_expressions: + type_expression "," type_expressions + { + std::swap($$, $3); + $$.emplace($$.cbegin(), $1); + } + | type_expression { $$.push_back($1); } +designator_expression: + simple_expression "[" expression "]" + { $$ = new frontend::array_access_expression(frontend::make_position(@2), $1, $3); } + | simple_expression "." IDENTIFIER + { $$ = new frontend::field_access_expression(frontend::make_position(@2), $1, $3); } + | simple_expression "^" + { $$ = new frontend::dereference_expression(frontend::make_position(@1), $1); } + | IDENTIFIER + { $$ = new frontend::variable_expression(frontend::make_position(@1), $1); } +statement: + designator_expression ":=" expression + { $$ = new frontend::assign_statement(frontend::make_position(@1), $1, $3); } + | "while" expression "do" optional_statements elsif_do_statements "end" + { + frontend::conditional_statements *body = new frontend::conditional_statements($2, std::move($4)); + $$ = new frontend::while_statement(frontend::make_position(@1), body, std::move($5)); + } + | "if" expression "then" optional_statements elsif_then_statements else_statements "end" + { + frontend::conditional_statements *then = new frontend::conditional_statements($2, std::move($4)); + $$ = new frontend::if_statement(frontend::make_position(@1), then, std::move($5), $6); + } + | call_expression { $$ = $1; } + | "defer" optional_statements "end" + { $$ = new frontend::defer_statement(frontend::make_position(@1), std::move($2)); } + | "case" expression "of" switch_cases else_statements "end" + { $$ = new frontend::case_statement(frontend::make_position(@1), $2, std::move($4), $5); } +switch_case: case_labels ":" optional_statements + { $$ = { .labels = std::move($1), .statements = std::move($3) }; } +switch_cases: + switch_case "|" switch_cases + { + std::swap($$, $3); + $$.emplace($$.cbegin(), $1); + } + | switch_case { $$.push_back($1); } +case_labels: + expression "," case_labels + { + std::swap($$, $3); + $$.emplace($$.cbegin(), $1); + } + | expression { $$.push_back($1); } +required_statements: + required_statements ";" statement + { + std::swap($$, $1); + $$.insert($$.cend(), $3); + } + | statement { $$.push_back($1); } +optional_statements: + required_statements { std::swap($$, $1); } + | /* no statements */ {} +field_declaration: + IDENTIFIER ":" type_expression { $$ = std::make_pair($1, $3); } +required_fields: + field_declaration ";" required_fields + { + std::swap($$, $3); + $$.emplace($$.cbegin(), $1); + } + | field_declaration { $$.emplace_back($1); } +optional_fields: + required_fields { std::swap($$, $1); } + | /* no fields */ {} +type_expression: + "[" INTEGER "]" type_expression + { + $$ = new frontend::array_type_expression(frontend::make_position(@1), $4, $2); + } + | "^" type_expression + { + $$ = new frontend::pointer_type_expression(frontend::make_position(@1), $2); + } + | "record" optional_fields "end" + { + $$ = new frontend::record_type_expression(frontend::make_position(@1), std::move($2)); + } + | "union" required_fields "end" + { + $$ = new frontend::union_type_expression(frontend::make_position(@1), std::move($2)); + } + | "proc" "(" type_expressions ")" return_declaration + { + auto result = new frontend::procedure_type_expression(frontend::make_position(@1), std::move($5)); + std::swap(result->parameters, $3); + $$ = result; + } + | "(" identifiers ")" + { + $$ = new frontend::enumeration_type_expression(frontend::make_position(@1), std::move($2)); + } + | IDENTIFIER + { + $$ = new frontend::named_type_expression(frontend::make_position(@1), $1); + } +identifiers: + IDENTIFIER "," identifiers + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | IDENTIFIER { $$.emplace_back(std::move($1)); } +variable_declaration: + identifier_definitions ":" type_expression ";" + { + std::shared_ptr shared_type{ $3 }; + $$ = new frontend::variable_declaration( frontend::make_position(@2), std::move($1), shared_type); + } + | identifier_definitions ":" type_expression ":=" "extern" ";" + { + std::shared_ptr shared_type{ $3 }; + $$ = new frontend::variable_declaration( frontend::make_position(@2), std::move($1), shared_type, + std::monostate{}); + } + | identifier_definitions ":" type_expression ":=" expression ";" + { + std::shared_ptr shared_type{ $3 }; + $$ = new frontend::variable_declaration( frontend::make_position(@2), std::move($1), shared_type, $5); + } +variable_declarations: + /* no variable declarations */ {} + | variable_declaration variable_declarations + { + std::swap($$, $2); + $$.insert(std::cbegin($$), $1); + } +variable_part: + /* no variable declarations */ {} + | "var" variable_declarations { std::swap($$, $2); } +constant_declaration: identifier_definition ":=" expression ";" + { + $$ = new frontend::constant_declaration(frontend::make_position(@1), std::move($1), $3); + } +constant_declarations: + constant_declaration constant_declarations + { + std::swap($$, $2); + $$.insert(std::cbegin($$), $1); + } + | /* no constant definitions */ {} +constant_part: + /* no constant definitions */ {} + | "const" constant_declarations { std::swap($$, $2); } +import_declaration: + IDENTIFIER "." import_declaration + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | IDENTIFIER { $$.emplace_back(std::move($1)); } +import_declarations: + import_declaration "," import_declarations + { + std::swap($$, $3); + $$.emplace($$.cbegin(), new frontend::import_declaration(frontend::make_position(@1), std::move($1))); + } + | import_declaration + { + $$.emplace_back(new frontend::import_declaration(frontend::make_position(@1), std::move($1))); + } +import_part: + /* no import declarations */ {} + | "import" import_declarations ";" { std::swap($$, $2); } +type_declaration: identifier_definition "=" type_expression ";" + { + $$ = new frontend::type_declaration(frontend::make_position(@1), std::move($1), $3); + } +type_declarations: + type_declaration type_declarations + { + std::swap($$, $2); + $$.insert($$.cbegin(), $1); + } + | /* no type definitions */ {} +type_part: + /* no type definitions */ {} + | "type" type_declarations { std::swap($$, $2); } +formal_parameter: + IDENTIFIER ":" type_expression { $$ = std::make_pair($1, $3); } +formal_parameter_list: + "(" ")" {} + | "(" formal_parameters ")" { std::swap($$, $2); } +formal_parameters: + formal_parameter "," formal_parameters + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | formal_parameter { $$.emplace_back(std::move($1)); } +actual_parameter_list: + "(" ")" {} + | "(" expressions ")" { std::swap($$, $2); } +%% + +void yy::parser::error(const location_type& loc, const std::string& message) +{ + driver.add_error(message, driver.input_file, loc); +} diff --git a/frontend/result.cc b/frontend/result.cc new file mode 100644 index 0000000..aca9c5e --- /dev/null +++ b/frontend/result.cc @@ -0,0 +1,67 @@ +/* Miscellaneous types used across stage boundaries. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/frontend/result.h" + +namespace elna::frontend +{ + error::error(const char *path, const struct position position) + : position(position), path(path) + { + } + + std::size_t error::line() const noexcept + { + return this->position.line; + } + + std::size_t error::column() const noexcept + { + return this->position.column; + } + + error_container::error_container(const char *input_file) + : input_file(input_file) + { + } + + std::deque>& error_container::errors() + { + return m_errors; + } + + bool error_container::has_errors() const + { + return !m_errors.empty(); + } + + bool identifier_definition::operator==(const identifier_definition& that) const + { + return *this == that.name; + } + + bool identifier_definition::operator==(const std::string& that) const + { + return this->name == that; + } +} + +std::size_t std::hash::operator()( + const elna::frontend::identifier_definition& key) const +{ + return std::hash{}(key.name); +} diff --git a/frontend/semantic.cc b/frontend/semantic.cc new file mode 100644 index 0000000..36c75b8 --- /dev/null +++ b/frontend/semantic.cc @@ -0,0 +1,644 @@ +/* Name analysis. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/frontend/semantic.h" + +#include +#include + +namespace elna::frontend +{ + undeclared_error::undeclared_error(const std::string& identifier, const char *path, const struct position position) + : error(path, position), identifier(identifier) + { + } + + std::string undeclared_error::what() const + { + return "Type '" + identifier + "' not declared"; + } + + already_declared_error::already_declared_error(const std::string& identifier, + const char *path, const struct position position) + : error(path, position), identifier(identifier) + { + } + + std::string already_declared_error::what() const + { + return "Symbol '" + identifier + "' has been already declared"; + } + + field_duplication_error::field_duplication_error(const std::string& field_name, + const char *path, const struct position position) + : error(path, position), field_name(field_name) + { + } + + std::string field_duplication_error::what() const + { + return "Repeated field name '" + field_name + "'"; + } + + cyclic_declaration_error::cyclic_declaration_error(const std::vector& cycle, + const char *path, const struct position position) + : error(path, position), cycle(cycle) + { + } + + std::string cyclic_declaration_error::what() const + { + auto segment = std::cbegin(this->cycle); + std::string message = "Type declaration forms a cycle: " + *segment; + + ++segment; + for (; segment != std::cend(this->cycle); ++segment) + { + message += " -> " + *segment; + } + return message; + } + + return_error::return_error(const std::string& identifier, const char *path, const struct position position) + : error(path, position), identifier(identifier) + { + } + + std::string return_error::what() const + { + return "Procedure '" + identifier + "' is expected to return, but does not have a return statement"; + } + + variable_initializer_error::variable_initializer_error(const char *path, const struct position position) + : error(path, position) + { + } + + std::string variable_initializer_error::what() const + { + return "Only one variable can be initialized"; + } + + type_analysis_visitor::type_analysis_visitor(const char *path, symbol_bag bag) + : error_container(path), bag(bag) + { + } + + void type_analysis_visitor::visit(program *program) + { + visit(static_cast(program)); + } + + void type_analysis_visitor::visit(procedure_declaration *definition) + { + if (definition->body.has_value() && definition->heading().return_type.proper_type != nullptr) + { + for (statement *const statement : definition->body.value().body()) + { + statement->accept(this); + } + if (!this->returns) + { + add_error(definition->identifier.name, this->input_file, definition->position()); + } + } + } + + void type_analysis_visitor::visit(assign_statement *) + { + } + + void type_analysis_visitor::visit(if_statement *) + { + } + + void type_analysis_visitor::visit(while_statement *) + { + } + + void type_analysis_visitor::visit(return_statement *) + { + this->returns = true; + } + + void type_analysis_visitor::visit(defer_statement *) + { + } + + void type_analysis_visitor::visit(case_statement *) + { + } + + void type_analysis_visitor::visit(procedure_call *) + { + } + + bool type_analysis_visitor::check_unresolved_symbol(std::shared_ptr alias, + std::vector& alias_path) + { + if (std::find(std::cbegin(alias_path), std::cend(alias_path), alias->name) != std::cend(alias_path)) + { + return false; + } + alias_path.push_back(alias->name); + + if (auto another_alias = alias->reference.get()) + { + return check_unresolved_symbol(another_alias, alias_path); + } + return true; + } + + void type_analysis_visitor::visit(unit *unit) + { + for (type_declaration *const type : unit->types) + { + type->accept(this); + } + for (procedure_declaration *const procedure : unit->procedures) + { + this->returns = false; + procedure->accept(this); + } + } + + void type_analysis_visitor::visit(type_declaration *definition) + { + std::vector alias_path; + auto unresolved_type = this->bag.lookup(definition->identifier.name)->is_type()->symbol.get(); + + if (!check_unresolved_symbol(unresolved_type, alias_path)) + { + add_error(alias_path, this->input_file, definition->position()); + } + } + + name_analysis_visitor::name_analysis_visitor(const char *path, symbol_bag bag) + : error_container(path), bag(bag) + { + } + + procedure_type name_analysis_visitor::build_procedure(procedure_type_expression& type_expression) + { + procedure_type::return_t result_return; + + if (type_expression.return_type.no_return) + { + result_return = procedure_type::return_t(std::monostate{}); + } + else if (type_expression.return_type.proper_type != nullptr) + { + type_expression.return_type.proper_type->accept(this); + result_return = procedure_type::return_t(this->current_type); + } + else + { + result_return = procedure_type::return_t(); + } + procedure_type result_type = procedure_type(result_return); + + for (struct type_expression *parameter : type_expression.parameters) + { + parameter->accept(this); + result_type.parameters.push_back(this->current_type); + } + return result_type; + } + + void name_analysis_visitor::visit(program *program) + { + visit(static_cast(program)); + + for (statement *const statement : program->body) + { + statement->accept(this); + } + } + + void name_analysis_visitor::visit(type_declaration *definition) + { + definition->body().accept(this); + auto resolved = this->bag.resolve(definition->identifier.name, this->current_type); + auto info = std::make_shared(type(resolved)); + + info->exported = definition->identifier.exported; + this->bag.enter(definition->identifier.name, info); + } + + void name_analysis_visitor::visit(named_type_expression *type_expression) + { + auto unresolved_alias = this->bag.declared(type_expression->name); + + if (unresolved_alias != nullptr) + { + this->current_type = type(unresolved_alias); + } + else if (auto from_symbol_table = this->bag.lookup(type_expression->name)) + { + this->current_type = from_symbol_table->is_type()->symbol; + } + else + { + add_error(type_expression->name, this->input_file, type_expression->position()); + this->current_type = type(); + } + } + + void name_analysis_visitor::visit(pointer_type_expression *type_expression) + { + type_expression->base().accept(this); + this->current_type = type(std::make_shared(this->current_type)); + } + + void name_analysis_visitor::visit(array_type_expression *type_expression) + { + type_expression->base().accept(this); + this->current_type = type(std::make_shared(this->current_type, type_expression->size)); + } + + std::vector name_analysis_visitor::build_composite_type(const std::vector& fields) + { + std::vector result; + std::set field_names; + + for (auto& field : fields) + { + if (field_names.find(field.first) != field_names.cend()) + { + add_error(field.first, this->input_file, field.second->position()); + } + else + { + field_names.insert(field.first); + field.second->accept(this); + result.push_back(std::make_pair(field.first, this->current_type)); + } + } + return result; + } + + void name_analysis_visitor::visit(record_type_expression *type_expression) + { + auto result_type = std::make_shared(); + + result_type->fields = build_composite_type(type_expression->fields); + + this->current_type = type(result_type); + } + + void name_analysis_visitor::visit(union_type_expression *type_expression) + { + auto result_type = std::make_shared(); + + result_type->fields = build_composite_type(type_expression->fields); + + this->current_type = type(result_type); + } + + void name_analysis_visitor::visit(procedure_type_expression *type_expression) + { + std::shared_ptr result_type = + std::make_shared(std::move(build_procedure(*type_expression))); + + this->current_type = type(result_type); + } + + void name_analysis_visitor::visit(enumeration_type_expression *type_expression) + { + std::shared_ptr result_type = std::make_shared(type_expression->members); + + this->current_type = type(result_type); + } + + void name_analysis_visitor::visit(variable_declaration *declaration) + { + declaration->variable_type().accept(this); + + for (const auto& variable_identifier : declaration->identifiers) + { + auto variable_symbol = std::make_shared(this->current_type, declaration->is_extern); + + variable_symbol->exported = variable_identifier.exported; + if (!this->bag.enter(variable_identifier.name, variable_symbol)) + { + add_error(variable_identifier.name, this->input_file, + declaration->position()); + } + } + } + + void name_analysis_visitor::visit(constant_declaration *definition) + { + definition->body().accept(this); + auto constant_symbol = std::make_shared(this->current_literal); + + constant_symbol->exported = definition->identifier.exported; + this->bag.enter(definition->identifier.name, constant_symbol); + } + + void name_analysis_visitor::visit(procedure_declaration *definition) + { + std::shared_ptr info; + auto heading = build_procedure(definition->heading()); + + if (definition->body.has_value()) + { + info = std::make_shared(heading, definition->parameter_names, this->bag.enter()); + + for (constant_declaration *const constant : definition->body.value().constants()) + { + constant->accept(this); + } + for (variable_declaration *const variable : definition->body.value().variables()) + { + variable->accept(this); + } + for (statement *const statement : definition->body.value().body()) + { + statement->accept(this); + } + this->bag.leave(); + } + else + { + info = std::make_shared(heading, definition->parameter_names); + } + info->exported = definition->identifier.exported; + this->bag.enter(definition->identifier.name, info); + } + + void name_analysis_visitor::visit(assign_statement *statement) + { + statement->lvalue().accept(this); + statement->rvalue().accept(this); + } + + void name_analysis_visitor::visit(if_statement *statement) + { + statement->body().prerequisite().accept(this); + for (struct statement *const statement : statement->body().statements) + { + statement->accept(this); + } + for (const auto branch : statement->branches) + { + branch->prerequisite().accept(this); + + for (struct statement *const statement : branch->statements) + { + statement->accept(this); + } + } + if (statement->alternative != nullptr) + { + for (struct statement *const statement : *statement->alternative) + { + statement->accept(this); + } + } + } + + void name_analysis_visitor::visit(import_declaration *) + { + } + + void name_analysis_visitor::visit(while_statement *statement) + { + statement->body().prerequisite().accept(this); + for (struct statement *const statement : statement->body().statements) + { + statement->accept(this); + } + for (const auto branch : statement->branches) + { + branch->prerequisite().accept(this); + + for (struct statement *const statement : branch->statements) + { + statement->accept(this); + } + } + } + + void name_analysis_visitor::visit(return_statement *statement) + { + statement->return_expression().accept(this); + } + + void name_analysis_visitor::visit(defer_statement *statement) + { + for (struct statement *const statement : statement->statements) + { + statement->accept(this); + } + } + + void name_analysis_visitor::visit(case_statement *statement) + { + statement->condition().accept(this); + for (const switch_case& case_block : statement->cases) + { + for (expression *const case_label : case_block.labels) + { + case_label->accept(this); + } + for (struct statement *const statement : case_block.statements) + { + statement->accept(this); + } + } + if (statement->alternative != nullptr) + { + for (struct statement *const statement : *statement->alternative) + { + statement->accept(this); + } + } + } + + void name_analysis_visitor::visit(procedure_call *call) + { + call->callable().accept(this); + for (expression *const argument: call->arguments) + { + argument->accept(this); + } + } + + void name_analysis_visitor::visit(unit *unit) + { + for (type_declaration *const type : unit->types) + { + type->accept(this); + } + for (variable_declaration *const variable : unit->variables) + { + variable->accept(this); + } + for (procedure_declaration *const procedure : unit->procedures) + { + procedure->accept(this); + } + } + + void name_analysis_visitor::visit(traits_expression *trait) + { + if (!trait->parameters.empty()) + { + trait->parameters.front()->accept(this); + trait->types.push_back(this->current_type); + } + } + + void name_analysis_visitor::visit(cast_expression *expression) + { + expression->value().accept(this); + expression->target().accept(this); + expression->expression_type = this->current_type; + } + + void name_analysis_visitor::visit(binary_expression *expression) + { + expression->lhs().accept(this); + expression->rhs().accept(this); + } + + void name_analysis_visitor::visit(unary_expression *expression) + { + expression->operand().accept(this); + } + + void name_analysis_visitor::visit(variable_expression *) + { + } + + void name_analysis_visitor::visit(array_access_expression *expression) + { + expression->base().accept(this); + expression->index().accept(this); + } + + void name_analysis_visitor::visit(field_access_expression *expression) + { + expression->base().accept(this); + } + + void name_analysis_visitor::visit(dereference_expression *expression) + { + expression->base().accept(this); + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + void name_analysis_visitor::visit(literal *literal) + { + this->current_literal = literal->value; + } + + declaration_visitor::declaration_visitor(const char *path) + : error_container(path) + { + } + + void declaration_visitor::visit(program *program) + { + visit(static_cast(program)); + } + + void declaration_visitor::visit(import_declaration *) + { + } + + void declaration_visitor::visit(unit *unit) + { + for (import_declaration *const _import : unit->imports) + { + _import->accept(this); + } + for (type_declaration *const type : unit->types) + { + type->accept(this); + } + for (variable_declaration *const variable : unit->variables) + { + variable->accept(this); + } + for (procedure_declaration *const procedure : unit->procedures) + { + procedure->accept(this); + } + } + + void declaration_visitor::visit(type_declaration *definition) + { + const std::string& type_identifier = definition->identifier.name; + + if (!this->unresolved.insert({ type_identifier, std::make_shared(type_identifier) }).second) + { + add_error(definition->identifier.name, this->input_file, + definition->position()); + } + } + + void declaration_visitor::visit(variable_declaration *declaration) + { + if (declaration->has_initializer() && declaration->identifiers.size() > 1) + { + add_error(this->input_file, declaration->position()); + } + } + + void declaration_visitor::visit(procedure_declaration *definition) + { + if (!definition->body.has_value()) + { + return; + } + for (variable_declaration *const variable : definition->body.value().variables()) + { + variable->accept(this); + } + } +} diff --git a/frontend/symbol.cc b/frontend/symbol.cc new file mode 100644 index 0000000..bfecbd4 --- /dev/null +++ b/frontend/symbol.cc @@ -0,0 +1,427 @@ +/* Symbol definitions. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/frontend/symbol.h" + +namespace elna::frontend +{ + type::type() + { + } + + type::type(std::shared_ptr alias) + : tag(type_tag::alias), alias(alias) + { + } + + type::type(std::shared_ptr primitive) + : tag(type_tag::primitive), primitive(primitive) + { + } + + type::type(std::shared_ptr record) + : tag(type_tag::record), record(record) + { + } + + type::type(std::shared_ptr _union) + : tag(type_tag::_union), _union(_union) + { + } + + type::type(std::shared_ptr pointer) + : tag(type_tag::pointer), pointer(pointer) + { + } + + type::type(std::shared_ptr array) + : tag(type_tag::array), array(array) + { + } + + type::type(std::shared_ptr procedure) + : tag(type_tag::procedure), procedure(procedure) + { + } + + type::type(std::shared_ptr enumeration) + : tag(type_tag::enumeration), enumeration(enumeration) + { + } + + void type::copy(const type& other) + { + switch (other.tag) + { + case type_tag::empty: + break; + case type_tag::alias: + new (&alias) std::weak_ptr(other.alias); + break; + case type_tag::primitive: + new (&primitive) std::shared_ptr(other.primitive); + break; + case type_tag::record: + new (&record) std::shared_ptr(other.record); + break; + case type_tag::_union: + new (&_union) std::shared_ptr(other._union); + break; + case type_tag::pointer: + new (&pointer) std::shared_ptr(other.pointer); + break; + case type_tag::array: + new (&array) std::shared_ptr(other.array); + break; + case type_tag::procedure: + new (&procedure) std::shared_ptr(other.procedure); + break; + case type_tag::enumeration: + new (&enumeration) std::shared_ptr(other.enumeration); + break; + } + } + + type::type(const type& other) + : tag(other.tag) + { + copy(other); + } + + void type::move(type&& other) + { + switch (other.tag) + { + case type_tag::empty: + break; + case type_tag::alias: + new (&alias) std::weak_ptr(std::move(other.alias)); + break; + case type_tag::primitive: + new (&primitive) std::shared_ptr(std::move(other.primitive)); + break; + case type_tag::record: + new (&record) std::shared_ptr(std::move(other.record)); + break; + case type_tag::_union: + new (&_union) std::shared_ptr(std::move(other._union)); + break; + case type_tag::pointer: + new (&pointer) std::shared_ptr(std::move(other.pointer)); + break; + case type_tag::array: + new (&array) std::shared_ptr(std::move(other.array)); + break; + case type_tag::procedure: + new (&procedure) std::shared_ptr(std::move(other.procedure)); + break; + case type_tag::enumeration: + new (&enumeration) std::shared_ptr(std::move(other.enumeration)); + break; + } + } + + type& type::operator=(const type& other) + { + this->~type(); + this->tag = other.tag; + copy(other); + return *this; + } + + type::type(type&& other) + : tag(other.tag) + { + move(std::move(other)); + } + + type& type::operator=(type&& other) + { + this->~type(); + this->tag = other.tag; + move(std::move(other)); + return *this; + } + + bool type::operator==(const std::nullptr_t&) + { + return empty(); + } + + type::~type() + { + switch (tag) + { + case type_tag::empty: + break; + case type_tag::alias: + this->alias.~weak_ptr(); + break; + case type_tag::primitive: + this->primitive.~shared_ptr(); + break; + case type_tag::record: + this->record.~shared_ptr(); + break; + case type_tag::_union: + this->_union.~shared_ptr(); + break; + case type_tag::pointer: + this->pointer.~shared_ptr(); + break; + case type_tag::array: + this->array.~shared_ptr(); + break; + case type_tag::procedure: + this->procedure.~shared_ptr(); + break; + case type_tag::enumeration: + this->enumeration.~shared_ptr(); + break; + } + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::alias ? this->alias.lock() : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::primitive ? this->primitive : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::record ? this->record : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::_union ? this->_union : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::pointer ? this->pointer : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::array ? this->array : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::procedure ? this->procedure : nullptr; + } + + template<> + std::shared_ptr type::get() const + { + return tag == type_tag::enumeration ? this->enumeration : nullptr; + } + + bool type::empty() const + { + return tag == type_tag::empty; + } + + alias_type::alias_type(const std::string& name) + : name(name), reference() + { + } + + pointer_type::pointer_type(type base) + : base(base) + { + } + + array_type::array_type(type base, std::uint64_t size) + : base(base), size(size) + { + } + + primitive_type::primitive_type(const std::string& identifier) + : identifier(identifier) + { + } + + procedure_type::procedure_type(return_t return_type) + : return_type(return_type) + { + } + + enumeration_type::enumeration_type(const std::vector& members) + : members(members) + { + } + + info::~info() + { + } + + std::shared_ptr info::is_type() + { + return nullptr; + } + + std::shared_ptr info::is_procedure() + { + return nullptr; + } + + std::shared_ptr info::is_constant() + { + return nullptr; + } + + std::shared_ptr info::is_variable() + { + return nullptr; + } + + type_info::type_info(const type symbol) + : symbol(symbol) + { + } + + std::shared_ptr type_info::is_type() + { + return std::static_pointer_cast(shared_from_this()); + } + + procedure_info::procedure_info(const procedure_type symbol, const std::vector names, + std::shared_ptr scope) + : symbol(symbol), names(names), scope(scope) + { + } + + std::shared_ptr procedure_info::is_procedure() + { + return std::static_pointer_cast(shared_from_this()); + } + + bool procedure_info::is_extern() const + { + return this->scope == nullptr; + } + + constant_info::constant_info(const variant& symbol) + : symbol(symbol) + { + } + + std::shared_ptr constant_info::is_constant() + { + return std::static_pointer_cast(shared_from_this()); + } + + variable_info::variable_info(const type symbol, bool is_extern) + : symbol(symbol), is_extern(is_extern) + { + } + + std::shared_ptr variable_info::is_variable() + { + return std::static_pointer_cast(shared_from_this()); + } + + std::shared_ptr builtin_symbol_table() + { + auto result = std::make_shared(); + + result->enter("Int", std::make_shared(type(std::make_shared("Int")))); + result->enter("Word", std::make_shared(type(std::make_shared("Word")))); + result->enter("Char", std::make_shared(type(std::make_shared("Char")))); + result->enter("Bool", std::make_shared(type(std::make_shared("Bool")))); + result->enter("Pointer", std::make_shared(type(std::make_shared("Pointer")))); + result->enter("Float", std::make_shared(type(std::make_shared("Float")))); + result->enter("String", std::make_shared(type(std::make_shared("String")))); + + return result; + } + + symbol_bag::symbol_bag(forward_table&& unresolved, std::shared_ptr global_table) + : unresolved(unresolved) + { + this->symbols = std::make_shared(global_table); + } + + std::shared_ptr symbol_bag::lookup(const std::string& name) + { + for (auto import_bag : this->imports) + { + if (auto result = import_bag->lookup(name)) + { + return result; + } + } + return this->symbols->lookup(name); + } + + bool symbol_bag::enter(const std::string& name, std::shared_ptr entry) + { + return this->symbols->enter(name, entry); + } + + std::shared_ptr symbol_bag::enter() + { + this->symbols = std::make_shared(this->symbols); + return this->symbols; + } + + void symbol_bag::enter(std::shared_ptr child) + { + this->symbols = child; + } + + std::shared_ptr symbol_bag::leave() + { + std::shared_ptr result = this->symbols; + + this->symbols = result->scope(); + return result; + } + + std::shared_ptr symbol_bag::declared(const std::string& symbol_name) + { + auto unresolved_alias = this->unresolved.find(symbol_name); + + return unresolved_alias == this->unresolved.end() ? std::shared_ptr() : unresolved_alias->second; + } + + std::shared_ptr symbol_bag::resolve(const std::string& symbol_name, type& resolution) + { + auto unresolved_declaration = this->unresolved.at(symbol_name); + + unresolved_declaration->reference = resolution; + return unresolved_declaration; + } + + void symbol_bag::add_import(const symbol_bag& bag) + { + this->imports.push_front(bag.symbols); + } +} diff --git a/gcc/Make-lang.in b/gcc/Make-lang.in new file mode 100644 index 0000000..e25fc6d --- /dev/null +++ b/gcc/Make-lang.in @@ -0,0 +1,174 @@ +# Top level -*- makefile -*- fragment for the Elna frontend. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +ELNA_INSTALL_NAME := $(shell echo gelna|sed '$(program_transform_name)') +ELNA_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gelna|sed '$(program_transform_name)') + +elna: elna1$(exeext) + +.PHONY: elna + +# Driver + +ELNA_OBJS = \ + $(GCC_OBJS) \ + elna/elna-spec.o \ + $(END) + +gelna$(exeext): $(ELNA_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a $(LIBDEPS) + +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ + $(ELNA_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a \ + $(EXTRA_GCC_LIBS) $(LIBS) + +# Create a version of the gelna driver which calls the cross-compiler. +gelna-cross$(exeext): gelna$(exeext) + -rm -f gelna-cross$(exeext) + cp gelna$(exeext) gelna-cross$(exeext) + +# The compiler proper + +elna_OBJS = \ + elna/elna1.o \ + elna/elna-generic.o \ + elna/elna-diagnostic.o \ + elna/elna-tree.o \ + elna/elna-builtins.o \ + elna/ast.o \ + elna/dependency.o \ + elna/driver.o \ + elna/lexer.o \ + elna/parser.o \ + elna/semantic.o \ + elna/symbol.o \ + elna/result.o \ + $(END) + +elna1$(exeext): attribs.o $(elna_OBJS) $(BACKEND) $(LIBDEPS) + +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ + attribs.o $(elna_OBJS) $(BACKEND) $(LIBS) $(BACKENDLIBS) + +elna.all.cross: gelna-cross$(exeext) + +elna.start.encap: gelna$(exeext) +elna.rest.encap: + +# No elna-specific selftests. +selftest-elna: + +ELNA_TEXI_FILES = \ + elna/gcc/gelna.texi \ + $(srcdir)/doc/include/fdl.texi \ + $(srcdir)/doc/include/gpl_v3.texi \ + $(srcdir)/doc/include/funding.texi \ + $(srcdir)/doc/include/gcc-common.texi \ + gcc-vers.texi + +elna.install-common: installdirs + -rm -f $(DESTDIR)$(bindir)/$(ELNA_INSTALL_NAME)$(exeext) + $(INSTALL_PROGRAM) gelna$(exeext) $(DESTDIR)$(bindir)/$(ELNA_INSTALL_NAME)$(exeext) + -if test -f elna1$(exeext); then \ + if test -f gelna-cross$(exeext); then \ + :; \ + else \ + rm -f $(DESTDIR)$(bindir)/$(ELNA_TARGET_INSTALL_NAME)$(exeext); \ + ( cd $(DESTDIR)$(bindir) && \ + $(LN) $(ELNA_INSTALL_NAME)$(exeext) $(ELNA_TARGET_INSTALL_NAME)$(exeext) ); \ + fi; \ + fi + +$(build_htmldir)/gelna/index.html: $(ELNA_TEXI_FILES) + $(mkinstalldirs) $(@D) + rm -f $(@D)/* + $(TEXI2HTML) -I $(gcc_docdir)/include -I $(srcdir)/elna -o $(@D) $< + +# Required goals, they still do nothing +elna.install-man: +elna.install-info: +elna.install-pdf: +elna.install-plugin: + +elna.install-html: $(build_htmldir)/gelna + @$(NORMAL_INSTALL) + test -z "$(htmldir)" || $(mkinstalldirs) "$(DESTDIR)$(htmldir)" + @for p in $(build_htmldir)/gelna; do \ + if test -f "$$p" || test -d "$$p"; then d=""; else d="$(srcdir)/"; fi; \ + f=$(html__strip_dir) \ + if test -d "$$d$$p"; then \ + echo " $(mkinstalldirs) '$(DESTDIR)$(htmldir)/$$f'"; \ + $(mkinstalldirs) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \ + echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f"; \ + else \ + echo " $(INSTALL_DATA) '$$d$$p' '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d$$p" "$(DESTDIR)$(htmldir)/$$f"; \ + fi; \ + done + +elna.info: +elna.dvi: +elna.pdf: +elna.html: $(build_htmldir)/gelna/index.html +elna.man: +elna.mostlyclean: +elna.clean: +elna.distclean: +elna.maintainer-clean: + +# make uninstall +elna.uninstall: + -rm -f gelna$(exeext) elna1$(exeext) + -rm -f $(elna_OBJS) + +# Used for handling bootstrap +elna.stage1: stage1-start + -mv elna/*$(objext) stage1/elna +elna.stage2: stage2-start + -mv elna/*$(objext) stage2/elna +elna.stage3: stage3-start + -mv elna/*$(objext) stage3/elna +elna.stage4: stage4-start + -mv elna/*$(objext) stage4/elna +elna.stageprofile: stageprofile-start + -mv elna/*$(objext) stageprofile/elna +elna.stagefeedback: stagefeedback-start + -mv elna/*$(objext) stagefeedback/elna + +ELNA_INCLUDES = -I $(srcdir)/elna/include -I elna/generated +ELNA_CXXFLAGS = -std=c++17 + +elna/%.o: elna/frontend/%.cc elna/generated/parser.hh elna/generated/location.hh + $(COMPILE) $(ELNA_CXXFLAGS) $(ELNA_INCLUDES) $< + $(POSTCOMPILE) + +elna/%.o: elna/generated/%.cc elna/generated/parser.hh elna/generated/location.hh + $(COMPILE) $(ELNA_CXXFLAGS) $(ELNA_INCLUDES) $< + $(POSTCOMPILE) + +elna/%.o: elna/gcc/%.cc elna/generated/parser.hh elna/generated/location.hh + $(COMPILE) $(ELNA_CXXFLAGS) $(ELNA_INCLUDES) $< + $(POSTCOMPILE) + +elna/generated/parser.cc: elna/frontend/parser.yy + mkdir -p $(dir $@) + $(BISON) -d -o $@ $< + +elna/generated/parser.hh elna/generated/location.hh: elna/generated/parser.cc + @touch $@ + +elna/generated/lexer.cc: elna/frontend/lexer.ll + mkdir -p $(dir $@) + $(FLEX) -o $@ $< diff --git a/gcc/README.md b/gcc/README.md new file mode 100644 index 0000000..99d03c3 --- /dev/null +++ b/gcc/README.md @@ -0,0 +1,42 @@ +# Elna programming language + +## Current implementation + +This repository contains a GCC frontend for Elna. After finishing the frontend +I'm planning to rewrite the compiler in Elna itself with its own backend and +a hand-written parser. So GCC gives a way to have a simple bootstrap compiler +and a possbility to compile Elna programs for different platforms. + +## Grammar + +Flex and bison grammar specifications, `lexer.ll` and `parser.yy`, can be found +in the `boot/` directory. + +## Build + +The frontend requires GCC 15.2.0 (not tested with other versions). + +Download the GCC source. Copy the contents of this repository into `gcc/elna` +inside GCC. Finally build GCC enabling the frontend with +`--enable-languages=c,c++,elna`. After the installation the compiler can be +invoked with `$prefix/bin/gelna`. + +There is also a `Rakefile` that downloads, builds and installs GCC into the +`./build/` subdirectory. The `Rakefile` assumes that ruby and rake, as well as +all GCC dependencies are already available in the system. It works under Linux +and Mac OS. In the latter case GCC is patched with the patches used by Homebrew +(official GCC doesn't support Apple silicon targets). Invoke with + +```sh +rake boot +``` + +`gcc` binary is used by default, but a different gcc version can be specified +by passing `CC` and `CXX` environment variables to rake, e.g.: + +```sh +rake CC=gcc-15 CXX=g++-15 boot +``` + +See `rake -T` for more tasks. The GCC source is under `build/tools`. The +installation path is `build/host/install`. diff --git a/gcc/config-lang.in b/gcc/config-lang.in new file mode 100644 index 0000000..0cbbe1f --- /dev/null +++ b/gcc/config-lang.in @@ -0,0 +1,37 @@ +# Top level configure fragment for gcc Elna frontend. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Configure looks for the existence of this file to auto-config each language. +# We define several parameters used by configure: +# +# language - name of language as it would appear in $(LANGUAGES) +# boot_language - "yes" if we need to build this language in stage1 +# compilers - value to add to $(COMPILERS) + +language="elna" +gcc_subdir="elna/gcc" + +compilers="elna1\$(exeext)" + +target_libs="" + +gtfiles="\$(srcdir)/elna/gcc/elna1.cc \$(srcdir)/elna/include/elna/gcc/elna1.h" + +lang_requires_boot_languages=c++ + +# Do not build by default +build_by_default="no" diff --git a/gcc/elna-builtins.cc b/gcc/elna-builtins.cc new file mode 100644 index 0000000..cf06df8 --- /dev/null +++ b/gcc/elna-builtins.cc @@ -0,0 +1,274 @@ +/* Builtin definitions. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include + +#include "elna/gcc/elna-builtins.h" +#include "elna/gcc/elna1.h" +#include "stor-layout.h" +#include "stringpool.h" +#include "elna/gcc/elna-tree.h" + +namespace elna::gcc +{ + void init_ttree() + { + elna_int_type_node = long_integer_type_node; + elna_word_type_node = size_type_node; + elna_char_type_node = unsigned_char_type_node; + elna_pointer_type_node = ptr_type_node; + elna_float_type_node = double_type_node; + + elna_bool_type_node = boolean_type_node; + elna_bool_true_node = boolean_true_node; + elna_bool_false_node = boolean_false_node; + + elna_pointer_nil_node = null_pointer_node; + + elna_string_type_node = make_node(RECORD_TYPE); + tree string_ptr_type = build_pointer_type_for_mode(elna_char_type_node, VOIDmode, true); + + elna_string_length_field_node = build_field(UNKNOWN_LOCATION, + elna_string_type_node, "length", build_qualified_type(elna_word_type_node, TYPE_QUAL_CONST)); + elna_string_ptr_field_node = build_field(UNKNOWN_LOCATION, + elna_string_type_node, "ptr", build_qualified_type(string_ptr_type, TYPE_QUAL_CONST)); + + TYPE_FIELDS(elna_string_type_node) = chainon(elna_string_ptr_field_node, elna_string_length_field_node); + layout_type(elna_string_type_node); + } + + static + tree declare_builtin_type(std::shared_ptr symbol_table, const char *name, tree type) + { + tree identifier = get_identifier(name); + tree type_declaration = build_decl(UNKNOWN_LOCATION, TYPE_DECL, identifier, type); + + symbol_table->enter(name, type_declaration); + + return type_declaration; + } + + std::shared_ptr builtin_symbol_table() + { + std::shared_ptr symbol_table = std::make_shared(); + + declare_builtin_type(symbol_table, "Int", elna_int_type_node); + declare_builtin_type(symbol_table, "Word", elna_word_type_node); + declare_builtin_type(symbol_table, "Char", elna_char_type_node); + declare_builtin_type(symbol_table, "Bool", elna_bool_type_node); + declare_builtin_type(symbol_table, "Pointer", elna_pointer_type_node); + declare_builtin_type(symbol_table, "Float", elna_float_type_node); + + tree string_declaration = declare_builtin_type(symbol_table, "String", elna_string_type_node); + TYPE_NAME(elna_string_type_node) = DECL_NAME(string_declaration); + TYPE_STUB_DECL(elna_string_type_node) = string_declaration; + + return symbol_table; + } + + tree build_composite_type(const std::vector& fields, tree composite_type_node, + std::shared_ptr symbols) + { + for (auto& field : fields) + { + tree rewritten_field = get_inner_alias(field.second, symbols); + tree field_declaration = build_field(UNKNOWN_LOCATION, + composite_type_node, field.first, rewritten_field); + TYPE_FIELDS(composite_type_node) = chainon(TYPE_FIELDS(composite_type_node), field_declaration); + } + layout_type(composite_type_node); + return composite_type_node; + } + + tree build_procedure_type(const frontend::procedure_type& procedure, std::shared_ptr symbols) + { + std::vector parameter_types(procedure.parameters.size()); + + for (std::size_t i = 0; i < procedure.parameters.size(); ++i) + { + parameter_types[i] = get_inner_alias(procedure.parameters.at(i), symbols); + } + tree return_type = void_type_node; + + if (!procedure.return_type.proper_type.empty()) + { + return_type = get_inner_alias(procedure.return_type.proper_type, symbols); + } + return build_function_type_array(return_type, procedure.parameters.size(), parameter_types.data()); + } + + tree get_inner_alias(const frontend::type& type, std::shared_ptr symbols) + { + if (auto reference = type.get()) + { + auto looked_up = symbols->lookup(reference->identifier); + gcc_assert(looked_up != NULL_TREE); + + return TREE_TYPE(looked_up); + } + else if (auto reference = type.get()) + { + tree composite_type_node = make_node(RECORD_TYPE); + + build_composite_type(reference->fields, composite_type_node, symbols); + + return composite_type_node; + } + else if (auto reference = type.get()) + { + tree composite_type_node = make_node(UNION_TYPE); + + build_composite_type(reference->fields, composite_type_node, symbols); + + return composite_type_node; + } + else if (auto reference = type.get()) + { + return build_enumeration_type(reference->members); + } + else if (auto reference = type.get()) + { + return build_global_pointer_type(get_inner_alias(reference->base, symbols)); + } + else if (auto reference = type.get()) + { + tree base = get_inner_alias(reference->base, symbols); + + return build_static_array_type(base, reference->size); + } + else if (auto reference = type.get()) + { + auto procedure = build_procedure_type(*reference, symbols); + + return build_global_pointer_type(procedure); + } + else if (auto reference = type.get()) + { + return TREE_TYPE(handle_symbol(reference->name, reference, symbols)); + } + return error_mark_node; + } + + tree handle_symbol(const std::string& symbol_name, std::shared_ptr reference, + std::shared_ptr symbols) + { + tree looked_up = symbols->lookup(symbol_name); + + if (looked_up == NULL_TREE) + { + tree type_tree = get_inner_alias(reference->reference, symbols); + looked_up = build_decl(UNKNOWN_LOCATION, TYPE_DECL, + get_identifier(symbol_name.c_str()), type_tree); + + TREE_PUBLIC(looked_up) = 1; + if (is_unique_type(type_tree)) + { + TYPE_NAME(type_tree) = DECL_NAME(looked_up); + TYPE_STUB_DECL(type_tree) = looked_up; + } + else + { + TYPE_NAME(type_tree) = looked_up; + } + symbols->enter(symbol_name, looked_up); + } + return looked_up; + } + + void declare_procedure(const std::string& name, const frontend::procedure_info& info, + std::shared_ptr symbols) + { + tree declaration_type = gcc::build_procedure_type(info.symbol, symbols); + tree fndecl = build_fn_decl(name.c_str(), declaration_type); + symbols->enter(name, fndecl); + + if (info.symbol.return_type.no_return) + { + TREE_THIS_VOLATILE(fndecl) = 1; + } + tree resdecl = build_decl(UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, TREE_TYPE(declaration_type)); + DECL_CONTEXT(resdecl) = fndecl; + DECL_RESULT(fndecl) = resdecl; + + tree argument_chain = NULL_TREE; + function_args_iterator parameter_type; + function_args_iter_init(¶meter_type, declaration_type); + + std::vector::const_iterator parameter_name = info.names.cbegin(); + + for (frontend::type parameter : info.symbol.parameters) + { + tree declaration_tree = build_decl(UNKNOWN_LOCATION, PARM_DECL, + get_identifier(parameter_name->c_str()), function_args_iter_cond(¶meter_type)); + DECL_CONTEXT(declaration_tree) = fndecl; + DECL_ARG_TYPE(declaration_tree) = function_args_iter_cond(¶meter_type); + + argument_chain = chainon(argument_chain, declaration_tree); + function_args_iter_next(¶meter_type); + ++parameter_name; + } + DECL_ARGUMENTS(fndecl) = argument_chain; + TREE_ADDRESSABLE(fndecl) = 1; + DECL_EXTERNAL(fndecl) = info.is_extern(); + TREE_PUBLIC(fndecl) = info.exported; + } + + tree declare_variable(const std::string& name, const frontend::variable_info& info, + std::shared_ptr symbols) + { + auto variable_type = get_inner_alias(info.symbol, symbols); + tree declaration_tree = build_decl(UNKNOWN_LOCATION, VAR_DECL, get_identifier(name.c_str()), variable_type); + + TREE_ADDRESSABLE(declaration_tree) = 1; + DECL_EXTERNAL(declaration_tree) = info.is_extern; + TREE_PUBLIC(declaration_tree) = info.exported; + + symbols->enter(name, declaration_tree); + + return declaration_tree; + } + + void declare_type(const std::string& name, const frontend::type_info& info, std::shared_ptr symbols) + { + // The top level symbol table has basic (builtin) types in it which are not aliases. + if (auto alias_type = info.symbol.get()) + { + tree type_declaration = handle_symbol(name, alias_type, symbols); + + TREE_PUBLIC(type_declaration) = info.exported; + } + } + + void rewrite_symbol_table(std::shared_ptr info_table, std::shared_ptr symbols) + { + for (auto& [symbol_name, symbol_info] : *info_table) + { + if (auto type_info = symbol_info->is_type()) + { + declare_type(symbol_name, *type_info, symbols); + } + else if (auto variable_info = symbol_info->is_variable()) + { + declare_variable(symbol_name, *variable_info, symbols); + } + else if (auto procedure_info = symbol_info->is_procedure()) + { + declare_procedure(symbol_name, *procedure_info, symbols); + } + } + } +} diff --git a/gcc/elna-diagnostic.cc b/gcc/elna-diagnostic.cc new file mode 100644 index 0000000..162d6cb --- /dev/null +++ b/gcc/elna-diagnostic.cc @@ -0,0 +1,167 @@ +/* Elna frontend specific diagnostic routines. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/gcc/elna-diagnostic.h" +#include "elna/gcc/elna-tree.h" +#include "elna/gcc/elna1.h" + +namespace elna::gcc +{ + linemap_guard::linemap_guard(const char *filename) + { + linemap_add(line_table, LC_ENTER, 0, filename, 1); + } + + linemap_guard::~linemap_guard() + { + linemap_add(line_table, LC_LEAVE, 0, NULL, 0); + } + + location_t get_location(const frontend::position *position) + { + linemap_line_start(line_table, position->line, 0); + + return linemap_position_for_column(line_table, position->column); + } + + std::string print_aggregate_name(tree type, const std::string& kind_name) + { + if (TYPE_IDENTIFIER(type) == NULL_TREE) + { + return kind_name; + } + else + { + return std::string(IDENTIFIER_POINTER(TYPE_IDENTIFIER(type))); + } + } + + std::string print_type(tree type) + { + gcc_assert(TYPE_P(type)); + + tree unqualified_type = get_qualified_type(type, TYPE_UNQUALIFIED); + tree_code code = TREE_CODE(type); + + if (unqualified_type == elna_int_type_node) + { + return "Int"; + } + else if (unqualified_type == elna_word_type_node) + { + return "Word"; + } + else if (unqualified_type == elna_bool_type_node) + { + return "Bool"; + } + else if (unqualified_type == elna_pointer_type_node) + { + return "Pointer"; + } + else if (unqualified_type == elna_float_type_node) + { + return "Float"; + } + else if (unqualified_type == elna_char_type_node) + { + return "Char"; + } + else if (unqualified_type == elna_string_type_node) + { + return "String"; + } + else if (is_void_type(unqualified_type)) // For procedures without a return type. + { + return "()"; + } + else if (POINTER_TYPE_P(unqualified_type)) + { + tree pointer_target_type = TREE_TYPE(type); + + if (TREE_CODE(pointer_target_type) == FUNCTION_TYPE) + { + return print_type(pointer_target_type); + } + else + { + return std::string("^" + print_type(pointer_target_type)); + } + } + else if (code == FUNCTION_TYPE) + { + std::string output = "proc("; + tree parameter_type = TYPE_ARG_TYPES(type); + while (TREE_VALUE(parameter_type) != void_type_node) + { + output += print_type(TREE_VALUE(parameter_type)); + parameter_type = TREE_CHAIN(parameter_type); + if (TREE_VALUE(parameter_type) == void_type_node) + { + break; + } + else + { + output += ", "; + } + } + output += ')'; + tree return_type = TREE_TYPE(type); + + if (!is_void_type(return_type)) + { + output += " -> " + print_type(return_type); + } + return output; + } + else if (code == ARRAY_TYPE) + { + return "array"; + } + else if (code == RECORD_TYPE) + { + return print_aggregate_name(unqualified_type, "record"); + } + else if (code == UNION_TYPE) + { + return print_aggregate_name(unqualified_type, "union"); + } + else if (code == ENUMERAL_TYPE) + { + return print_aggregate_name(unqualified_type, "enumeration"); + } + else + { + return "<>"; + } + gcc_unreachable(); + } + + void report_errors(const std::deque>& errors) + { + for (const auto& error : errors) + { + location_t gcc_location{ UNKNOWN_LOCATION }; + + if (error->position.line != 0 || error->position.column != 0) + { + gcc_location = elna::gcc::get_location(&error->position); + } + error_at(gcc_location, error->what().c_str()); + } + } +} diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc new file mode 100644 index 0000000..b37b111 --- /dev/null +++ b/gcc/elna-generic.cc @@ -0,0 +1,1277 @@ +/* Visitor generating a GENERIC tree. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include + +#include "elna/gcc/elna-generic.h" +#include "elna/gcc/elna-diagnostic.h" +#include "elna/gcc/elna1.h" +#include "elna/gcc/elna-builtins.h" + +#include "ggc.h" +#include "function.h" +#include "cgraph.h" +#include "gimplify.h" +#include "stringpool.h" +#include "diagnostic.h" +#include "realmpfr.h" +#include "varasm.h" +#include "fold-const.h" +#include "langhooks.h" + +namespace elna::gcc +{ + generic_visitor::generic_visitor(std::shared_ptr symbol_table, elna::frontend::symbol_bag bag) + : bag(bag), symbols(symbol_table) + { + } + + void generic_visitor::build_procedure_call(location_t call_location, + tree procedure_address, const std::vector& arguments) + { + vec *argument_trees = nullptr; + tree symbol_type = TREE_TYPE(TREE_TYPE(procedure_address)); + + tree current_parameter = TYPE_ARG_TYPES(symbol_type); + + vec_alloc(argument_trees, arguments.size()); + for (frontend::expression *const argument : arguments) + { + location_t argument_location = get_location(&argument->position()); + if (VOID_TYPE_P(TREE_VALUE(current_parameter))) + { + error_at(argument_location, "Too many arguments, expected %i, got %lu", + list_length(TYPE_ARG_TYPES(symbol_type)) - 1, arguments.size()); + this->current_expression = error_mark_node; + break; + } + argument->accept(this); + this->current_expression = prepare_rvalue(this->current_expression); + if (!is_assignable_from(TREE_VALUE(current_parameter), this->current_expression)) + { + error_at(argument_location, + "Cannot assign value of type '%s' to variable of type '%s'", + print_type(TREE_TYPE(this->current_expression)).c_str(), + print_type(TREE_VALUE(current_parameter)).c_str()); + this->current_expression = error_mark_node; + } + current_parameter = TREE_CHAIN(current_parameter); + argument_trees->quick_push(this->current_expression); + } + tree stmt = fold_build_call_array_loc(call_location, TREE_TYPE(symbol_type), + procedure_address, vec_safe_length(argument_trees), vec_safe_address(argument_trees)); + + if (!VOID_TYPE_P(TREE_VALUE(current_parameter))) + { + error_at(call_location, "Too few arguments, expected %i, got %lu", + list_length(TYPE_ARG_TYPES(symbol_type)) - 1, arguments.size()); + this->current_expression = error_mark_node; + } + else + { + this->current_expression = stmt; + } + } + + void generic_visitor::build_record_call(location_t call_location, + tree symbol, const std::vector& arguments) + { + vec *tree_arguments = nullptr; + tree record_fields = TYPE_FIELDS(symbol); + for (frontend::expression *const argument : arguments) + { + location_t argument_location = get_location(&argument->position()); + + if (is_void_type(record_fields)) + { + error_at(argument_location, "Too many arguments, expected %i, got %lu", + list_length(TYPE_FIELDS(symbol)), arguments.size()); + this->current_expression = error_mark_node; + break; + } + argument->accept(this); + tree unqualified_field = get_qualified_type(TREE_TYPE(record_fields), TYPE_UNQUALIFIED); + if (!is_assignable_from(unqualified_field, this->current_expression)) + { + error_at(argument_location, + "Cannot assign value of type '%s' to variable of type '%s'", + print_type(TREE_TYPE(this->current_expression)).c_str(), + print_type(TREE_TYPE(record_fields)).c_str()); + this->current_expression = error_mark_node; + } + CONSTRUCTOR_APPEND_ELT(tree_arguments, record_fields, this->current_expression); + record_fields = TREE_CHAIN(record_fields); + } + if (!is_void_type(record_fields)) + { + error_at(call_location, "Too few arguments, expected %i, got %lu", + list_length(TYPE_FIELDS(symbol)), arguments.size()); + this->current_expression = error_mark_node; + } + else + { + this->current_expression = build_constructor(symbol, tree_arguments); + } + } + + void generic_visitor::build_assert_builtin(location_t call_location, + const std::vector& arguments) + { + if (arguments.size() != 1) + { + error_at(call_location, "assert expects exactly one boolean argument, got %lu", arguments.size()); + this->current_expression = error_mark_node; + } + else + { + arguments.at(0)->accept(this); + tree argument_type = TREE_TYPE(this->current_expression); + + if (argument_type != elna_bool_type_node) + { + error_at(call_location, "assert expects exactly one boolean argument, got %s", + print_type(argument_type).c_str()); + this->current_expression = error_mark_node; + } + tree constant_expression = extract_constant(this->current_expression); + if (constant_expression == boolean_false_node) + { + this->current_expression = call_built_in(call_location, "__builtin_unreachable", void_type_node); + } + else if (constant_expression != boolean_true_node) + { + tree assert_expression = call_built_in(call_location, "__builtin_trap", void_type_node); + this->current_expression = build3(COND_EXPR, void_type_node, this->current_expression, + NULL_TREE, assert_expression); + } + else + { + this->current_expression = NULL_TREE; + } + } + } + + bool generic_visitor::build_builtin_procedures(frontend::procedure_call *call) + { + location_t call_location = get_location(&call->position()); + + if (frontend::variable_expression *named_call = call->callable().is_variable()) + { + if (named_call->name == "assert") + { + build_assert_builtin(call_location, call->arguments); + return true; + } + } + return false; + } + + void generic_visitor::visit(frontend::procedure_call *call) + { + if (build_builtin_procedures(call)) + { + return; + } + location_t call_location = get_location(&call->position()); + call->callable().accept(this); + + tree expression_type = TYPE_P(this->current_expression) + ? this->current_expression + : TREE_TYPE(this->current_expression); + + if (TREE_CODE(expression_type) == RECORD_TYPE) + { + build_record_call(call_location, expression_type, call->arguments); + } + else if (TREE_CODE(expression_type) == FUNCTION_TYPE) + { + this->current_expression = build1(ADDR_EXPR, + build_global_pointer_type(expression_type), this->current_expression); + build_procedure_call(call_location, this->current_expression, call->arguments); + } + else if (POINTER_TYPE_P(expression_type) && TREE_CODE(TREE_TYPE(expression_type)) == FUNCTION_TYPE) + { + build_procedure_call(call_location, this->current_expression, call->arguments); + } + else + { + error_at(call_location, "'%s' cannot be called, it is neither a procedure nor record", + print_type(expression_type).c_str()); + this->current_expression = error_mark_node; + } + } + + void generic_visitor::visit(frontend::cast_expression *expression) + { + tree cast_target = get_inner_alias(expression->expression_type, this->symbols->scope()); + + expression->value().accept(this); + tree cast_source = TREE_TYPE(this->current_expression); + + if (is_castable_type(cast_target) && (is_castable_type(cast_source))) + { + this->current_expression = build1_loc(get_location(&expression->position()), CONVERT_EXPR, + cast_target, this->current_expression); + } + else + { + error_at(get_location(&expression->position()), "Type '%s' cannot be converted to '%s'", + print_type(cast_source).c_str(), print_type(cast_target).c_str()); + this->current_expression = error_mark_node; + } + } + + void generic_visitor::visit(frontend::program *program) + { + visit(static_cast(program)); + + tree declaration_type = build_function_type_list(elna_int_type_node, + elna_int_type_node, + build_global_pointer_type(build_global_pointer_type(elna_char_type_node)), + NULL_TREE); + tree fndecl = build_fn_decl("main", declaration_type); + + tree resdecl = build_decl(UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, integer_type_node); + DECL_CONTEXT(resdecl) = fndecl; + DECL_RESULT(fndecl) = resdecl; + + push_struct_function(fndecl, false); + DECL_STRUCT_FUNCTION(fndecl)->language = ggc_cleared_alloc(); + + enter_scope(); + + tree parameter_type = TYPE_ARG_TYPES(declaration_type); + for (const char *argument_name : std::array{ "count", "parameters" }) + { + tree declaration_tree = build_decl(UNKNOWN_LOCATION, PARM_DECL, + get_identifier(argument_name), TREE_VALUE(parameter_type)); + DECL_CONTEXT(declaration_tree) = fndecl; + DECL_ARG_TYPE(declaration_tree) = TREE_VALUE(parameter_type); + + this->symbols->enter(argument_name, declaration_tree); + DECL_ARGUMENTS(fndecl) = chainon(DECL_ARGUMENTS(fndecl), declaration_tree); + parameter_type = TREE_CHAIN(parameter_type); + } + visit_statements(program->body); + tree set_result = build2(INIT_EXPR, void_type_node, DECL_RESULT(fndecl), + build_int_cst_type(integer_type_node, 0)); + tree return_stmt = build1(RETURN_EXPR, void_type_node, set_result); + append_statement(return_stmt); + tree mapping = leave_scope(); + + BLOCK_SUPERCONTEXT(BIND_EXPR_BLOCK(mapping)) = fndecl; + DECL_INITIAL(fndecl) = BIND_EXPR_BLOCK(mapping); + DECL_SAVED_TREE(fndecl) = mapping; + + DECL_EXTERNAL(fndecl) = 0; + DECL_PRESERVE_P(fndecl) = 1; + + pop_cfun(); + gimplify_function_tree(fndecl); + cgraph_node::finalize_function(fndecl, true); + } + + void generic_visitor::visit(frontend::unit *unit) + { + for (frontend::import_declaration *const declaration : unit->imports) + { + declaration->accept(this); + } + for (frontend::constant_declaration *const constant : unit->constants) + { + constant->accept(this); + } + for (frontend::variable_declaration *const variable : unit->variables) + { + variable->accept(this); + } + for (frontend::procedure_declaration *const procedure : unit->procedures) + { + procedure->accept(this); + } + } + + void generic_visitor::visit(frontend::procedure_declaration *definition) + { + tree fndecl = this->symbols->lookup(definition->identifier.name); + + if (!definition->body.has_value()) + { + return; + } + push_struct_function(fndecl, false); + DECL_STRUCT_FUNCTION(fndecl)->language = ggc_cleared_alloc(); + + enter_scope(); + this->bag.enter(this->bag.lookup(definition->identifier.name)->is_procedure()->scope); + + tree argument_chain = DECL_ARGUMENTS(fndecl); + for (; argument_chain != NULL_TREE; argument_chain = TREE_CHAIN(argument_chain)) + { + this->symbols->enter(IDENTIFIER_POINTER(DECL_NAME(argument_chain)), argument_chain); + } + for (frontend::constant_declaration *const constant : definition->body.value().constants()) + { + constant->accept(this); + } + for (frontend::variable_declaration *const variable : definition->body.value().variables()) + { + variable->accept(this); + } + visit_statements(definition->body.value().body()); + + tree mapping = leave_scope(); + this->bag.leave(); + + BLOCK_SUPERCONTEXT(BIND_EXPR_BLOCK(mapping)) = fndecl; + DECL_INITIAL(fndecl) = BIND_EXPR_BLOCK(mapping); + DECL_SAVED_TREE(fndecl) = mapping; + + DECL_PRESERVE_P(fndecl) = 1; + + pop_cfun(); + gimplify_function_tree(fndecl); + cgraph_node::finalize_function(fndecl, true); + } + + void generic_visitor::enter_scope() + { + this->symbols = std::make_shared(this->symbols); + + // Chain the binding levels. + struct binding_level *new_level = ggc_cleared_alloc(); + new_level->level_chain = f_binding_level; + new_level->statement_list = alloc_stmt_list(); + f_binding_level = new_level; + } + + tree generic_visitor::leave_scope() + { + // Variables are only defined in the top function scope. + tree variables = f_binding_level->level_chain == nullptr ? f_names : NULL_TREE; + tree new_block = build_block(variables, f_binding_level->blocks, NULL_TREE, NULL_TREE); + + for (tree it = f_binding_level->blocks; it != NULL_TREE; it = BLOCK_CHAIN(it)) + { + BLOCK_SUPERCONTEXT(it) = new_block; + } + tree bind_expr = build3(BIND_EXPR, void_type_node, variables, chain_defer(), new_block); + this->symbols = this->symbols->scope(); + + f_binding_level = f_binding_level->level_chain; + + if (f_binding_level != nullptr) + { + f_binding_level->blocks = chainon(f_binding_level->blocks, new_block); + } + return bind_expr; + } + + void generic_visitor::visit(frontend::literal *literal) + { + this->current_expression = build_int_cst(elna_int_type_node, literal->value); + } + + void generic_visitor::visit(frontend::literal *literal) + { + this->current_expression = build_int_cstu(elna_word_type_node, literal->value); + } + + void generic_visitor::visit(frontend::literal *literal) + { + REAL_VALUE_TYPE real_value1; + + mpfr_t number; + mpfr_init2(number, SIGNIFICAND_BITS); + mpfr_set_d(number, literal->value, MPFR_RNDN); + + real_from_mpfr(&real_value1, number, double_type_node, MPFR_RNDN); + + this->current_expression = build_real(double_type_node, real_value1); + + mpfr_clear(number); + } + + void generic_visitor::visit(frontend::literal *boolean) + { + this->current_expression = boolean->value ? boolean_true_node : boolean_false_node; + } + + void generic_visitor::visit(frontend::literal *character) + { + this->current_expression = build_int_cstu(elna_char_type_node, character->value); + } + + void generic_visitor::visit(frontend::literal *) + { + this->current_expression = elna_pointer_nil_node; + } + + void generic_visitor::visit(frontend::literal *string) + { + tree index_constant = build_int_cstu(elna_word_type_node, string->value.size()); + tree string_type = build_array_type(elna_char_type_node, build_index_type(index_constant)); + + tree string_literal = build_string(string->value.size(), string->value.c_str()); + TREE_TYPE(string_literal) = string_type; + TREE_CONSTANT(string_literal) = 1; + TREE_READONLY(string_literal) = 1; + TREE_STATIC(string_literal) = 1; + + string_type = TREE_TYPE(elna_string_ptr_field_node); + string_literal = build4(ARRAY_REF, elna_char_type_node, + string_literal, integer_zero_node, NULL_TREE, NULL_TREE); + string_literal = build1(ADDR_EXPR, string_type, string_literal); + + vec *elms = nullptr; + CONSTRUCTOR_APPEND_ELT(elms, elna_string_ptr_field_node, string_literal); + CONSTRUCTOR_APPEND_ELT(elms, elna_string_length_field_node, index_constant); + + this->current_expression = build_constructor(elna_string_type_node, elms); + } + + tree generic_visitor::build_arithmetic_operation(frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right) + { + return build_binary_operation(is_numeric_type(TREE_TYPE(left)), + expression, operator_code, left, right, TREE_TYPE(left)); + } + + tree generic_visitor::build_comparison_operation(frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right) + { + return build_binary_operation(is_numeric_type(TREE_TYPE(left)) || POINTER_TYPE_P(TREE_TYPE(left)), + expression, operator_code, left, right, elna_bool_type_node); + } + + tree generic_visitor::build_bit_logic_operation(frontend::binary_expression *expression, tree left, tree right) + { + location_t expression_location = get_location(&expression->position()); + tree left_type = TREE_TYPE(left); + tree right_type = TREE_TYPE(right); + tree_code logical_code, bit_code; + + if (expression->operation() == frontend::binary_operator::conjunction) + { + bit_code = BIT_AND_EXPR; + logical_code = TRUTH_ANDIF_EXPR; + } + else if (expression->operation() == frontend::binary_operator::disjunction) + { + bit_code = BIT_IOR_EXPR; + logical_code = TRUTH_ORIF_EXPR; + } + else if (expression->operation() == frontend::binary_operator::exclusive_disjunction) + { + bit_code = BIT_XOR_EXPR; + logical_code = TRUTH_XOR_EXPR; + } + else + { + gcc_unreachable(); + } + if (left_type == elna_bool_type_node) + { + return build2_loc(expression_location, logical_code, elna_bool_type_node, left, right); + } + else if (is_integral_type(left_type)) + { + return build2_loc(expression_location, bit_code, left_type, left, right); + } + else + { + error_at(expression_location, "Invalid operands of type '%s' and '%s' for operator %s", + print_type(left_type).c_str(), print_type(right_type).c_str(), + elna::frontend::print_binary_operator(expression->operation())); + return error_mark_node; + } + } + + tree generic_visitor::build_equality_operation(frontend::binary_expression *expression, tree left, tree right) + { + location_t expression_location = get_location(&expression->position()); + tree_code equality_code, combination_code; + + if (expression->operation() == frontend::binary_operator::equals) + { + equality_code = EQ_EXPR; + combination_code = TRUTH_ANDIF_EXPR; + } + else if (expression->operation() == frontend::binary_operator::not_equals) + { + equality_code = NE_EXPR; + combination_code = TRUTH_ORIF_EXPR; + } + else + { + gcc_unreachable(); + } + if (TREE_TYPE(left) == elna_string_type_node) + { + tree lhs_length = build3(COMPONENT_REF, TREE_TYPE(elna_string_length_field_node), + left, elna_string_length_field_node, NULL_TREE); + tree lhs_ptr = build3(COMPONENT_REF, TREE_TYPE(elna_string_ptr_field_node), + left, elna_string_ptr_field_node, NULL_TREE); + + tree rhs_length = build3(COMPONENT_REF, TREE_TYPE(elna_string_length_field_node), + right, elna_string_length_field_node, NULL_TREE); + tree rhs_ptr = build3(COMPONENT_REF, TREE_TYPE(elna_string_ptr_field_node), + right, elna_string_ptr_field_node, NULL_TREE); + + tree length_equality = build2(equality_code, elna_bool_type_node, lhs_length, rhs_length); + tree memcmp_call = call_built_in(UNKNOWN_LOCATION, "__builtin_memcmp", integer_type_node, + lhs_ptr, rhs_ptr, lhs_length); + tree equals_zero = build2(equality_code, elna_bool_type_node, memcmp_call, integer_zero_node); + + return build2(combination_code, elna_bool_type_node, length_equality, equals_zero); + } + else + { + return build2_loc(expression_location, equality_code, elna_bool_type_node, left, right); + } + } + + void generic_visitor::visit(frontend::binary_expression *expression) + { + expression->lhs().accept(this); + tree left = this->current_expression; + tree left_type = get_qualified_type(TREE_TYPE(left), TYPE_UNQUALIFIED); + + expression->rhs().accept(this); + tree right = this->current_expression; + tree right_type = get_qualified_type(TREE_TYPE(right), TYPE_UNQUALIFIED); + + location_t expression_location = get_location(&expression->position()); + + if ((POINTER_TYPE_P(left_type) || POINTER_TYPE_P(right_type)) + && (expression->operation() == frontend::binary_operator::sum + || expression->operation() == frontend::binary_operator::subtraction)) + { + this->current_expression = do_pointer_arithmetic(expression->operation(), + left, right, expression_location); + if (this->current_expression == error_mark_node) + { + error_at(expression_location, + "invalid operation %s on a pointer and an integral type", + frontend::print_binary_operator(expression->operation())); + } + else if (TREE_TYPE(this->current_expression) == ssizetype) + { + this->current_expression = fold_convert(elna_int_type_node, this->current_expression); + } + return; + } + if (left_type != right_type + && !are_compatible_pointers(left_type, right) + && !are_compatible_pointers(right_type, left) + && !(is_integral_type(left_type) && right_type == elna_word_type_node)) + { + error_at(expression_location, + "invalid operands of type '%s' and '%s' for operator %s", + print_type(left_type).c_str(), print_type(right_type).c_str(), + frontend::print_binary_operator(expression->operation())); + this->current_expression = error_mark_node; + return; + } + switch (expression->operation()) + { + case frontend::binary_operator::sum: + this->current_expression = build_arithmetic_operation(expression, PLUS_EXPR, left, right); + break; + case frontend::binary_operator::subtraction: + this->current_expression = build_arithmetic_operation(expression, MINUS_EXPR, left, right); + break; + case frontend::binary_operator::division: + this->current_expression = build_arithmetic_operation(expression, TRUNC_DIV_EXPR, left, right); + break; + case frontend::binary_operator::remainder: + this->current_expression = build_arithmetic_operation(expression, TRUNC_MOD_EXPR, left, right); + break; + case frontend::binary_operator::multiplication: + this->current_expression = build_arithmetic_operation(expression, MULT_EXPR, left, right); + break; + case frontend::binary_operator::less: + this->current_expression = build_comparison_operation(expression, LT_EXPR, left, right); + break; + case frontend::binary_operator::greater: + this->current_expression = build_comparison_operation(expression, GT_EXPR, left, right); + break; + case frontend::binary_operator::less_equal: + this->current_expression = build_comparison_operation(expression, LE_EXPR, left, right); + break; + case frontend::binary_operator::greater_equal: + this->current_expression = build_comparison_operation(expression, GE_EXPR, left, right); + break; + case frontend::binary_operator::conjunction: + this->current_expression = build_bit_logic_operation(expression, left, right); + break; + case frontend::binary_operator::disjunction: + this->current_expression = build_bit_logic_operation(expression, left, right); + break; + case frontend::binary_operator::exclusive_disjunction: + this->current_expression = build_bit_logic_operation(expression, left, right); + break; + case frontend::binary_operator::equals: + this->current_expression = build_equality_operation(expression, left, right); + break; + case frontend::binary_operator::not_equals: + this->current_expression = build_equality_operation(expression, left, right); + break; + case frontend::binary_operator::shift_left: + this->current_expression = build_binary_operation( + is_numeric_type(left_type) && right_type == elna_word_type_node, + expression, LSHIFT_EXPR, left, right, left_type); + break; + case frontend::binary_operator::shift_right: + this->current_expression = build_binary_operation( + is_numeric_type(left_type) && right_type == elna_word_type_node, + expression, RSHIFT_EXPR, left, right, left_type); + break; + } + } + + void generic_visitor::visit(frontend::unary_expression *expression) + { + expression->operand().accept(this); + location_t location = get_location(&expression->position()); + + switch (expression->operation()) + { + case frontend::unary_operator::reference: + this->current_expression = prepare_rvalue(this->current_expression); + TREE_ADDRESSABLE(this->current_expression) = 1; + this->current_expression = build_fold_addr_expr_with_type_loc(location, + this->current_expression, + build_global_pointer_type(TREE_TYPE(this->current_expression))); + TREE_NO_TRAMPOLINE(this->current_expression) = 1; + break; + case frontend::unary_operator::negation: + if (TREE_TYPE(this->current_expression) == elna_bool_type_node) + { + this->current_expression = build1_loc(location, TRUTH_NOT_EXPR, + boolean_type_node, this->current_expression); + } + else if (is_integral_type(TREE_TYPE(this->current_expression))) + { + this->current_expression = build1_loc(location, BIT_NOT_EXPR, + TREE_TYPE(this->current_expression), this->current_expression); + } + else + { + error_at(location, "type '%s' cannot be negated", + print_type(TREE_TYPE(this->current_expression)).c_str()); + this->current_expression = error_mark_node; + } + break; + case frontend::unary_operator::minus: + if (is_integral_type(TREE_TYPE(this->current_expression))) + { + this->current_expression = fold_build1(NEGATE_EXPR, TREE_TYPE(this->current_expression), + this->current_expression); + } + else + { + error_at(location, "type '%s' cannot be negated", + print_type(TREE_TYPE(this->current_expression)).c_str()); + this->current_expression = error_mark_node; + } + } + } + + void generic_visitor::visit(frontend::constant_declaration *definition) + { + location_t definition_location = get_location(&definition->position()); + definition->body().accept(this); + + if (assert_constant(definition_location)) + { + this->current_expression = fold_init(this->current_expression); + } + else + { + this->current_expression = NULL_TREE; + return; + } + tree definition_tree = build_decl(definition_location, CONST_DECL, + get_identifier(definition->identifier.name.c_str()), TREE_TYPE(this->current_expression)); + auto result = this->symbols->enter(definition->identifier.name, definition_tree); + + if (result) + { + DECL_INITIAL(definition_tree) = this->current_expression; + TREE_CONSTANT(definition_tree) = 1; + TREE_READONLY(definition_tree) = 1; + TREE_PUBLIC(definition_tree) = definition->identifier.exported; + + if (!lang_hooks.decls.global_bindings_p()) + { + auto declaration_statement = build1_loc(definition_location, DECL_EXPR, + void_type_node, definition_tree); + append_statement(declaration_statement); + } + } + else + { + error_at(definition_location, "Variable '%s' already declared in this scope", + definition->identifier.name.c_str()); + } + this->current_expression = NULL_TREE; + } + + void generic_visitor::visit(frontend::variable_declaration *declaration) + { + for (const auto& variable_identifier : declaration->identifiers) + { + location_t declaration_location = get_location(&declaration->position()); + tree declaration_tree = this->symbols->lookup(variable_identifier.name); + + if (declaration_tree == NULL_TREE) + { + auto variable_symbol = this->bag.lookup(variable_identifier.name)->is_variable(); + + declaration_tree = declare_variable(variable_identifier.name, *variable_symbol, this->symbols); + } + // Set initializer if given. + if (declaration->body != nullptr) + { + declaration->body->accept(this); + if (is_assignable_from(TREE_TYPE(declaration_tree), this->current_expression)) + { + DECL_INITIAL(declaration_tree) = this->current_expression; + } + else + { + error_at(declaration_location, "Cannot initialize variable of type '%s' with a value of type '%s'", + print_type(TREE_TYPE(declaration_tree)).c_str(), + print_type(TREE_TYPE(this->current_expression)).c_str()); + } + } + else if (!declaration->is_extern && POINTER_TYPE_P(TREE_TYPE(declaration_tree))) + { + DECL_INITIAL(declaration_tree) = elna_pointer_nil_node; + } + this->current_expression = NULL_TREE; + + if (lang_hooks.decls.global_bindings_p()) + { + TREE_STATIC(declaration_tree) = !variable_identifier.exported && !declaration->is_extern; + varpool_node::get_create(declaration_tree); + varpool_node::finalize_decl(declaration_tree); + } + else + { + DECL_CONTEXT(declaration_tree) = current_function_decl; + f_names = chainon(f_names, declaration_tree); + + auto declaration_statement = build1_loc(declaration_location, DECL_EXPR, + void_type_node, declaration_tree); + append_statement(declaration_statement); + } + } + } + + void generic_visitor::visit(frontend::variable_expression *expression) + { + auto symbol = this->symbols->lookup(expression->name); + + if (symbol == NULL_TREE) + { + error_at(get_location(&expression->position()), "Symbol '%s' not declared in the current scope", + expression->name.c_str()); + this->current_expression = error_mark_node; + } + else + { + this->current_expression = symbol; + } + } + + void generic_visitor::visit(frontend::array_access_expression *expression) + { + expression->base().accept(this); + tree designator = this->current_expression; + location_t location = get_location(&expression->position()); + + expression->index().accept(this); + if (!is_integral_type(TREE_TYPE(this->current_expression))) + { + error_at(location, "Type '%s' cannot be used as index", + print_type(TREE_TYPE(this->current_expression)).c_str()); + this->current_expression = error_mark_node; + return; + } + tree offset = fold_convert(elna_word_type_node, this->current_expression); + + if (TREE_CODE(TREE_TYPE(designator)) == ARRAY_TYPE) + { + tree element_type = TREE_TYPE(TREE_TYPE(designator)); + + this->current_expression = build4_loc(location, + ARRAY_REF, element_type, designator, offset, size_one_node, NULL_TREE); + } + else if (TREE_TYPE(designator) == elna_string_type_node) + { + offset = build2(MINUS_EXPR, elna_word_type_node, offset, size_one_node); + tree string_ptr = build3_loc(location, COMPONENT_REF, TREE_TYPE(elna_string_ptr_field_node), + designator, elna_string_ptr_field_node, NULL_TREE); + + tree target_pointer = do_pointer_arithmetic(frontend::binary_operator::sum, string_ptr, offset, location); + + this->current_expression = build1_loc(location, INDIRECT_REF, + elna_char_type_node, target_pointer); + } + else + { + error_at(location, "Indexing is not allowed on type '%s'", + print_type(TREE_TYPE(designator)).c_str()); + this->current_expression = error_mark_node; + } + } + + bool generic_visitor::expect_trait_type_only(frontend::traits_expression *trait) + { + if (trait->parameters.size() != 1) + { + error_at(get_location(&trait->position()), "Trait '%s' expects 1 argument, got %lu", + trait->name.c_str(), trait->parameters.size()); + this->current_expression = error_mark_node; + return false; + } + this->current_expression = get_inner_alias(trait->types.front(), this->symbols); + + return this->current_expression != error_mark_node; + } + + bool generic_visitor::expect_trait_for_integral_type(frontend::traits_expression *trait) + { + if (!expect_trait_type_only(trait)) + { + return false; + } + else if (!is_integral_type(this->current_expression) && TREE_CODE(this->current_expression) != ENUMERAL_TYPE) + { + error_at(get_location(&trait->position()), "Type '%s' does not support trait '%s'", + print_type(this->current_expression).c_str(), trait->name.c_str()); + this->current_expression = error_mark_node; + return false; + } + return true; + } + + void generic_visitor::visit(frontend::traits_expression *trait) + { + location_t trait_location = get_location(&trait->position()); + + if (trait->name == "size") + { + if (expect_trait_type_only(trait)) + { + this->current_expression = build1_loc(trait_location, CONVERT_EXPR, elna_word_type_node, + size_in_bytes(this->current_expression)); + } + } + else if (trait->name == "alignment") + { + if (expect_trait_type_only(trait)) + { + this->current_expression = build_int_cstu(elna_word_type_node, + TYPE_ALIGN_UNIT(this->current_expression)); + } + } + else if (trait->name == "min") + { + if (expect_trait_for_integral_type(trait)) + { + this->current_expression = TYPE_MIN_VALUE(this->current_expression); + } + } + else if (trait->name == "max") + { + if (expect_trait_for_integral_type(trait)) + { + this->current_expression = TYPE_MAX_VALUE(this->current_expression); + } + } + else if (trait->name == "offset") + { + if (trait->parameters.size() != 2) + { + error_at(trait_location, "Trait '%s' expects 2 arguments, got %lu", + trait->name.c_str(), trait->parameters.size()); + this->current_expression = error_mark_node; + return; + } + this->current_expression = get_inner_alias(trait->types.front(), this->symbols); + auto field_type = trait->parameters.at(1)->is_named(); + + if (field_type == nullptr) + { + error_at(trait_location, + "The second argument to the offset trait is expected to be a field name," + "got a type expression"); + this->current_expression = error_mark_node; + return; + } + tree field_declaration = find_field_by_name(trait_location, this->current_expression, field_type->name); + + if (field_declaration != error_mark_node) + { + this->current_expression = build1(CONVERT_EXPR, elna_word_type_node, + byte_position(field_declaration)); + } + else + { + this->current_expression = error_mark_node; + } + } + else + { + error_at(get_location(&trait->position()), "Trait '%s' is unknown", trait->name.c_str()); + this->current_expression = error_mark_node; + } + } + + void generic_visitor::visit(frontend::field_access_expression *expression) + { + expression->base().accept(this); + location_t expression_location = get_location(&expression->position()); + tree aggregate_type = TREE_TYPE(this->current_expression); + + if (TREE_CODE(aggregate_type) == ARRAY_TYPE && expression->field() == "length") + { + this->current_expression = convert(build_qualified_type(elna_word_type_node, TYPE_QUAL_CONST), + TYPE_MAX_VALUE(TYPE_DOMAIN(aggregate_type))); + } + else if (TREE_CODE(aggregate_type) == ARRAY_TYPE && expression->field() == "ptr") + { + tree ptr_type = build_global_pointer_type(TREE_TYPE(aggregate_type)); + this->current_expression = build1(ADDR_EXPR, + build_qualified_type(ptr_type, TYPE_QUAL_CONST), this->current_expression); + } + else if (TREE_CODE(aggregate_type) == ENUMERAL_TYPE) + { + tree iterator{ NULL_TREE }; + + for (iterator = TYPE_VALUES(aggregate_type); iterator != NULL_TREE; iterator = TREE_CHAIN(iterator)) + { + if (IDENTIFIER_POINTER(TREE_PURPOSE(iterator)) == expression->field()) + { + this->current_expression = TREE_VALUE(iterator); + return; + } + } + this->current_expression = error_mark_node; + error_at(expression_location, "Unknown enumeration member '%s'", expression->field().c_str()); + } + else + { + tree field_declaration = find_field_by_name(expression_location, + TREE_TYPE(this->current_expression), expression->field()); + + if (field_declaration != error_mark_node) + { + this->current_expression = build3_loc(expression_location, COMPONENT_REF, + TREE_TYPE(field_declaration), this->current_expression, + field_declaration, NULL_TREE); + } + } + } + + void generic_visitor::visit(frontend::dereference_expression *expression) + { + expression->base().accept(this); + location_t expression_location = get_location(&expression->position()); + tree expression_type = TREE_TYPE(this->current_expression); + + if (POINTER_TYPE_P(expression_type)) + { + this->current_expression = build1_loc(expression_location, INDIRECT_REF, + TREE_TYPE(expression_type), this->current_expression); + } + else + { + error_at(expression_location, "Type '%s' cannot be dereferenced, it is not a pointer", + print_type(expression_type).c_str()); + this->current_expression = error_mark_node; + } + } + + void generic_visitor::visit(frontend::assign_statement *statement) + { + statement->lvalue().accept(this); + + tree lvalue = this->current_expression; + location_t statement_location = get_location(&statement->position()); + + statement->rvalue().accept(this); + tree rvalue = prepare_rvalue(this->current_expression); + + if (TREE_CODE(lvalue) == CONST_DECL) + { + error_at(statement_location, "Cannot modify constant '%s'", + statement->lvalue().is_variable()->name.c_str()); + } + else if (TYPE_READONLY(TREE_TYPE(lvalue))) + { + error_at(statement_location, "Cannot modify a constant expression of type '%s'", + print_type(TREE_TYPE(lvalue)).c_str()); + } + else if (is_assignable_from(TREE_TYPE(lvalue), rvalue)) + { + tree assignment = build2_loc(statement_location, MODIFY_EXPR, void_type_node, lvalue, rvalue); + + append_statement(assignment); + } + else + { + error_at(statement_location, "Cannot assign value of type '%s' to variable of type '%s'", + print_type(TREE_TYPE(rvalue)).c_str(), + print_type(TREE_TYPE(lvalue)).c_str()); + } + this->current_expression = NULL_TREE; + } + + void generic_visitor::visit(frontend::if_statement *statement) + { + tree endif_label_decl = create_artificial_label(UNKNOWN_LOCATION); + tree goto_endif = build1(GOTO_EXPR, void_type_node, endif_label_decl); + + make_if_branch(statement->body(), goto_endif); + + for (const auto branch : statement->branches) + { + make_if_branch(*branch, goto_endif); + } + if (statement->alternative != nullptr) + { + enter_scope(); + visit_statements(*statement->alternative); + tree mapping = leave_scope(); + append_statement(mapping); + } + tree endif_label_expr = build1(LABEL_EXPR, void_type_node, endif_label_decl); + append_statement(endif_label_expr); + this->current_expression = NULL_TREE; + } + + void generic_visitor::make_if_branch(frontend::conditional_statements& branch, tree goto_endif) + { + branch.prerequisite().accept(this); + + if (TREE_TYPE(this->current_expression) != elna_bool_type_node) + { + error_at(get_location(&branch.prerequisite().position()), + "Expected expression of boolean type but its type is %s", + print_type(TREE_TYPE(this->current_expression)).c_str()); + this->current_expression = error_mark_node; + return; + } + tree then_label_decl = build_label_decl("then", UNKNOWN_LOCATION); + tree goto_then = build1(GOTO_EXPR, void_type_node, then_label_decl); + + tree else_label_decl = build_label_decl("else", UNKNOWN_LOCATION); + tree goto_else = build1(GOTO_EXPR, void_type_node, else_label_decl); + + auto cond_expr = build3(COND_EXPR, void_type_node, this->current_expression, goto_then, goto_else); + append_statement(cond_expr); + + tree then_label_expr = build1(LABEL_EXPR, void_type_node, then_label_decl); + append_statement(then_label_expr); + enter_scope(); + + visit_statements(branch.statements); + tree mapping = leave_scope(); + append_statement(mapping); + append_statement(goto_endif); + + tree else_label_expr = build1(LABEL_EXPR, void_type_node, else_label_decl); + append_statement(else_label_expr); + } + + void generic_visitor::visit(frontend::import_declaration *) + { + } + + void generic_visitor::visit(frontend::while_statement *statement) + { + location_t prerequisite_location = get_location(&statement->body().prerequisite().position()); + tree prerequisite_label_decl = build_label_decl("while_do", prerequisite_location); + auto prerequisite_label_expr = build1_loc(prerequisite_location, LABEL_EXPR, + void_type_node, prerequisite_label_decl); + auto goto_check = build1(GOTO_EXPR, void_type_node, prerequisite_label_decl); + tree branch_end_declaration = build_label_decl("while_end", UNKNOWN_LOCATION); + tree branch_end_expression = build1_loc(UNKNOWN_LOCATION, LABEL_EXPR, void_type_node, branch_end_declaration); + + append_statement(prerequisite_label_expr); + make_if_branch(statement->body(), goto_check); + + for (const auto branch : statement->branches) + { + make_if_branch(*branch, goto_check); + } + append_statement(branch_end_expression); + this->current_expression = NULL_TREE; + } + + void generic_visitor::visit_statements(const std::vector& statements) + { + for (frontend::statement *const statement : statements) + { + statement->accept(this); + + if (this->current_expression != NULL_TREE && this->current_expression != error_mark_node) + { + append_statement(this->current_expression); + this->current_expression = NULL_TREE; + } + } + } + + void generic_visitor::visit(frontend::return_statement *statement) + { + frontend::expression *return_expression = &statement->return_expression(); + location_t statement_position = get_location(&statement->position()); + tree set_result{ NULL_TREE }; + tree return_type = TREE_TYPE(TREE_TYPE(current_function_decl)); + + if (TREE_THIS_VOLATILE(current_function_decl) == 1) + { + error_at(statement_position, "This procedure is not allowed to return"); + return; + } + if (return_expression != nullptr) + { + return_expression->accept(this); + + set_result = build2(INIT_EXPR, void_type_node, DECL_RESULT(current_function_decl), + this->current_expression); + } + if (return_type == void_type_node && set_result != NULL_TREE) + { + error_at(statement_position, "Proper procedure is not allowed to return a value"); + } + else if (return_type != void_type_node && set_result == NULL_TREE) + { + error_at(statement_position, "Procedure is expected to return a value of type '%s'", + print_type(return_type).c_str()); + } + else if (return_type != void_type_node && !is_assignable_from(return_type, this->current_expression)) + { + error_at(statement_position, "Cannot return '%s' from a procedure returning '%s'", + print_type(return_type).c_str(), + print_type(TREE_TYPE(this->current_expression)).c_str()); + } + else + { + tree return_stmt = build1_loc(statement_position, RETURN_EXPR, void_type_node, set_result); + append_statement(return_stmt); + } + this->current_expression = NULL_TREE; + } + + void generic_visitor::visit(frontend::defer_statement *statement) + { + enter_scope(); + visit_statements(statement->statements); + defer(leave_scope()); + } + + void generic_visitor::visit(frontend::case_statement *statement) + { + statement->condition().accept(this); + tree condition_expression = this->current_expression; + tree unqualified_condition = get_qualified_type(TREE_TYPE(this->current_expression), TYPE_UNQUALIFIED); + + if (!INTEGRAL_TYPE_P(unqualified_condition)) + { + error_at(get_location(&statement->condition().position()), + "Case expressions can only be integral numbers, characters and enumerations, given '%s'", + print_type(unqualified_condition).c_str()); + this->current_expression = NULL_TREE; + return; + } + tree end_label_declaration = create_artificial_label(get_location(&statement->position())); + tree switch_statements = alloc_stmt_list(); + + for (const frontend::switch_case& case_block : statement->cases) + { + for (frontend::expression *const case_label : case_block.labels) + { + case_label->accept(this); + location_t case_location = get_location(&case_label->position()); + + if (assert_constant(case_location) + && !is_assignable_from(unqualified_condition, this->current_expression)) + { + error_at(case_location, "Case type '%s' does not match the expression type '%s'", + print_type(TREE_TYPE(this->current_expression)).c_str(), + print_type(unqualified_condition).c_str()); + this->current_expression = error_mark_node; + } + tree case_label_declaration = create_artificial_label(case_location); + tree case_expression = build_case_label(this->current_expression, NULL_TREE, case_label_declaration); + + append_to_statement_list(case_expression, &switch_statements); + } + enter_scope(); + visit_statements(case_block.statements); + append_to_statement_list(leave_scope(), &switch_statements); + tree goto_end = build1(GOTO_EXPR, void_type_node, end_label_declaration); + + append_to_statement_list(goto_end, &switch_statements); + TREE_USED(end_label_declaration) = 1; + } + if (statement->alternative != nullptr) + { + tree case_label_declaration = create_artificial_label(UNKNOWN_LOCATION); + tree case_expression = build_case_label(NULL_TREE, NULL_TREE, case_label_declaration); + + append_to_statement_list(case_expression, &switch_statements); + + enter_scope(); + visit_statements(*statement->alternative); + append_to_statement_list(leave_scope(), &switch_statements); + + TREE_USED(end_label_declaration) = 1; + } + tree switch_expression = build2(SWITCH_EXPR, TREE_TYPE(condition_expression), + condition_expression, switch_statements); + + append_statement(switch_expression); + + tree end_label_expression = build1(LABEL_EXPR, void_type_node, end_label_declaration); + append_statement(end_label_expression); + + this->current_expression = NULL_TREE; + } + + bool generic_visitor::assert_constant(location_t expression_location) + { + tree constant_expression = extract_constant(this->current_expression); + + if (constant_expression == NULL_TREE) + { + error_at(expression_location, "Expected a constant expression"); + this->current_expression = error_mark_node; + } + else + { + this->current_expression = constant_expression; + } + return this->current_expression != error_mark_node; + } +} diff --git a/gcc/elna-spec.cc b/gcc/elna-spec.cc new file mode 100644 index 0000000..5d1ace1 --- /dev/null +++ b/gcc/elna-spec.cc @@ -0,0 +1,31 @@ +/* Specific flags and argument handling of the Elna front end. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +void lang_specific_driver(struct cl_decoded_option ** /* in_decoded_options */, + unsigned int * /* in_decoded_options_count */, + int * /*in_added_libraries */) +{ +} + +/* Called before linking. Returns 0 on success and -1 on failure. */ +int lang_specific_pre_link(void) +{ + return 0; +} + +/* Number of extra output files that lang_specific_pre_link may generate. */ +int lang_specific_extra_outfiles = 0; diff --git a/gcc/elna-tree.cc b/gcc/elna-tree.cc new file mode 100644 index 0000000..93f796b --- /dev/null +++ b/gcc/elna-tree.cc @@ -0,0 +1,315 @@ +/* Utilities to manipulate GCC trees. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "elna/gcc/elna-tree.h" +#include "elna/gcc/elna-diagnostic.h" +#include "elna/gcc/elna1.h" + +#include "function.h" +#include "stor-layout.h" +#include "diagnostic-core.h" + +namespace elna::gcc +{ + bool is_integral_type(tree type) + { + gcc_assert(TYPE_P(type)); + return TREE_CODE(type) == INTEGER_TYPE && type != elna_char_type_node; + } + + bool is_numeric_type(tree type) + { + return is_integral_type(type) || type == elna_float_type_node; + } + + bool is_unique_type(tree type) + { + gcc_assert(TYPE_P(type)); + return RECORD_OR_UNION_TYPE_P(type) || TREE_CODE(type) == ENUMERAL_TYPE; + } + + bool is_void_type(tree type) + { + return type == NULL_TREE || type == void_type_node; + } + + bool is_castable_type(tree type) + { + gcc_assert(TYPE_P(type)); + return INTEGRAL_TYPE_P(type) || POINTER_TYPE_P(type) || TREE_CODE(type) == REAL_TYPE; + } + + bool are_compatible_pointers(tree lhs_type, tree rhs) + { + gcc_assert(TYPE_P(lhs_type)); + tree rhs_type = TREE_TYPE(rhs); + + return (POINTER_TYPE_P(lhs_type) && rhs == elna_pointer_nil_node) + || (POINTER_TYPE_P(lhs_type) && lhs_type == rhs_type); + } + + tree prepare_rvalue(tree rvalue) + { + if (DECL_P(rvalue) && TREE_CODE(TREE_TYPE(rvalue)) == FUNCTION_TYPE) + { + return build1(ADDR_EXPR, build_pointer_type_for_mode(TREE_TYPE(rvalue), VOIDmode, true), rvalue); + } + else + { + return rvalue; + } + } + + bool is_assignable_from(tree assignee, tree assignment) + { + return get_qualified_type(TREE_TYPE(assignment), TYPE_UNQUALIFIED) == assignee + || are_compatible_pointers(assignee, assignment); + } + + void append_statement(tree statement_tree) + { + if (!vec_safe_is_empty(f_binding_level->defers)) + { + append_to_statement_list(statement_tree, &f_binding_level->defers->begin()->try_statements); + } + else + { + append_to_statement_list(statement_tree, &f_binding_level->statement_list); + } + } + + void defer(tree statement_tree) + { + defer_scope new_defer{ statement_tree, alloc_stmt_list() }; + vec_safe_insert(f_binding_level->defers, 0, new_defer); + } + + tree chain_defer() + { + if (vec_safe_is_empty(f_binding_level->defers)) + { + return f_binding_level->statement_list; + } + defer_scope *defer_iterator = f_binding_level->defers->begin(); + tree defer_tree = build2(TRY_FINALLY_EXPR, void_type_node, + defer_iterator->try_statements, defer_iterator->defer_block); + int i; + + FOR_EACH_VEC_ELT_FROM(*f_binding_level->defers, i, defer_iterator, 1) + { + append_to_statement_list(defer_tree, &defer_iterator->try_statements); + defer_tree = build2(TRY_FINALLY_EXPR, void_type_node, + defer_iterator->try_statements, defer_iterator->defer_block); + } + return build2(COMPOUND_EXPR, TREE_TYPE(defer_tree), f_binding_level->statement_list, defer_tree); + } + + tree build_field(location_t location, tree record_type, const std::string name, tree type) + { + tree field_declaration = build_decl(location, + FIELD_DECL, get_identifier(name.c_str()), type); + TREE_ADDRESSABLE(field_declaration) = 1; + DECL_CONTEXT(field_declaration) = record_type; + + return field_declaration; + } + + tree do_pointer_arithmetic(frontend::binary_operator binary_operator, + tree left, tree right, location_t operation_location) + { + tree left_type = get_qualified_type(TREE_TYPE(left), TYPE_UNQUALIFIED); + tree right_type = get_qualified_type(TREE_TYPE(right), TYPE_UNQUALIFIED); + if (binary_operator == frontend::binary_operator::sum) + { + tree pointer{ NULL_TREE }; + tree offset{ NULL_TREE }; + tree pointer_type{ NULL_TREE }; + + if (POINTER_TYPE_P(left_type) && is_integral_type(right_type)) + { + pointer = left; + offset = right; + pointer_type = left_type; + } + else if (is_integral_type(left_type) && POINTER_TYPE_P(right_type)) + { + pointer = right; + offset = left; + pointer_type = right_type; + } + else + { + return error_mark_node; + } + tree size_exp = pointer_type == elna_pointer_type_node + ? size_one_node + : fold_convert(TREE_TYPE(offset), size_in_bytes(TREE_TYPE(TREE_TYPE(pointer)))); + + offset = fold_build2(MULT_EXPR, TREE_TYPE(offset), offset, size_exp); + offset = fold_convert(sizetype, offset); + + return fold_build2_loc(operation_location, POINTER_PLUS_EXPR, TREE_TYPE(pointer), pointer, offset); + } + else if (binary_operator == frontend::binary_operator::subtraction) + { + if (POINTER_TYPE_P(left_type) && is_integral_type(right_type)) + { + tree pointer_type = left_type; + tree offset_type = right_type; + tree size_exp = fold_convert(offset_type, size_in_bytes(TREE_TYPE(pointer_type))); + + tree convert_expression = fold_build2(MULT_EXPR, offset_type, right, size_exp); + convert_expression = fold_convert(sizetype, convert_expression); + + convert_expression = fold_build1(NEGATE_EXPR, sizetype, convert_expression); + return fold_build2_loc(operation_location, POINTER_PLUS_EXPR, pointer_type, left, convert_expression); + } + else if (POINTER_TYPE_P(left_type) && POINTER_TYPE_P(right_type) && left_type == right_type) + { + return fold_build2_loc(operation_location, POINTER_DIFF_EXPR, ssizetype, left, right); + } + } + gcc_unreachable(); + } + + tree build_binary_operation(bool condition, frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right, tree target_type) + { + location_t expression_location = get_location(&expression->position()); + tree left_type = get_qualified_type(TREE_TYPE(left), TYPE_UNQUALIFIED); + tree right_type = get_qualified_type(TREE_TYPE(right), TYPE_UNQUALIFIED); + + if (condition) + { + return fold_build2_loc(expression_location, operator_code, target_type, left, right); + } + else + { + error_at(expression_location, + "invalid operands of type '%s' and '%s' for operator %s", + print_type(left_type).c_str(), print_type(right_type).c_str(), + elna::frontend::print_binary_operator(expression->operation())); + return error_mark_node; + } + } + + tree find_field_by_name(location_t expression_location, tree type, const std::string& field_name) + { + if (type == error_mark_node) + { + return type; + } + tree field_declaration = TYPE_FIELDS(type); + + if (!RECORD_OR_UNION_TYPE_P(type)) + { + error_at(expression_location, "Type '%s' does not have a field named '%s'", + print_type(type).c_str(), field_name.c_str()); + return error_mark_node; + } + while (field_declaration != NULL_TREE) + { + tree declaration_name = DECL_NAME(field_declaration); + const char *identifier_pointer = IDENTIFIER_POINTER(declaration_name); + + if (field_name == identifier_pointer) + { + break; + } + field_declaration = TREE_CHAIN(field_declaration); + } + if (field_declaration == NULL_TREE) + { + error_at(expression_location, "Aggregate type does not have a field '%s'", field_name.c_str()); + return error_mark_node; + } + return field_declaration; + } + + tree build_global_pointer_type(tree type) + { + return build_pointer_type_for_mode(type, VOIDmode, true); + } + + tree build_static_array_type(tree type, const std::uint64_t size) + { + tree upper_bound = build_int_cst_type(integer_type_node, size); + tree range_type = build_range_type(integer_type_node, size_one_node, upper_bound); + + return build_array_type(type, range_type); + } + + tree build_enumeration_type(const std::vector& members) + { + tree composite_type_node = make_node(ENUMERAL_TYPE); + const tree base_type = integer_type_node; + + TREE_TYPE(composite_type_node) = base_type; + ENUM_IS_SCOPED(composite_type_node) = 1; + + tree *pp = &TYPE_VALUES(composite_type_node); + std::size_t order{ 1 }; + + for (const std::string& member : members) + { + tree member_name = get_identifier(member.c_str()); + tree member_declaration = build_decl(UNKNOWN_LOCATION, CONST_DECL, member_name, composite_type_node); + + DECL_CONTEXT(member_declaration) = composite_type_node; + DECL_INITIAL(member_declaration) = build_int_cst_type(composite_type_node, order++); + TREE_CONSTANT(member_declaration) = 1; + TREE_READONLY(member_declaration) = 1; + + TYPE_MAX_VALUE(composite_type_node) = DECL_INITIAL(member_declaration); + + *pp = build_tree_list(member_name, member_declaration); + pp = &TREE_CHAIN(*pp); + } + TYPE_MIN_VALUE(composite_type_node) = DECL_INITIAL(TREE_VALUE(TYPE_VALUES(composite_type_node))); + TYPE_UNSIGNED(composite_type_node) = TYPE_UNSIGNED(base_type); + SET_TYPE_ALIGN(composite_type_node, TYPE_ALIGN(base_type)); + TYPE_SIZE(composite_type_node) = NULL_TREE; + TYPE_PRECISION(composite_type_node) = TYPE_PRECISION(base_type); + + layout_type(composite_type_node); + return composite_type_node; + } + + tree build_label_decl(const char *name, location_t loc) + { + auto label_decl = build_decl(loc, LABEL_DECL, get_identifier(name), void_type_node); + + DECL_CONTEXT(label_decl) = current_function_decl; + + return label_decl; + } + + tree extract_constant(tree expression) + { + int code = TREE_CODE(expression); + + if (code == CONST_DECL) + { + return DECL_INITIAL(expression); + } + else if (TREE_CODE_CLASS(code) == tcc_constant) + { + return expression; + } + return NULL_TREE; + } +} diff --git a/gcc/elna1.cc b/gcc/elna1.cc new file mode 100644 index 0000000..448a24c --- /dev/null +++ b/gcc/elna1.cc @@ -0,0 +1,256 @@ +/* Language-dependent hooks for Elna. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "function.h" +#include "tree.h" +#include "elna/gcc/elna1.h" +#include "diagnostic.h" +#include "opts.h" +#include "debug.h" +#include "langhooks.h" +#include "langhooks-def.h" + +#include +#include "elna/frontend/dependency.h" +#include "elna/gcc/elna-tree.h" +#include "elna/gcc/elna-generic.h" +#include "elna/gcc/elna-diagnostic.h" +#include "elna/gcc/elna-builtins.h" + +tree elna_global_trees[ELNA_TI_MAX]; +hash_map *elna_global_decls = nullptr; + +/* The resulting tree type. */ + +union GTY ((desc("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"), + chain_next("CODE_CONTAINS_STRUCT (TREE_CODE (&%h.generic), " + "TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN " + "(&%h.generic)) : NULL"))) lang_tree_node +{ + union tree_node GTY ((tag ("0"), desc ("tree_node_structure (&%h)"))) generic; +}; + +/* Language hooks. */ + +static bool elna_langhook_init(void) +{ + build_common_tree_nodes(false); + + elna::gcc::init_ttree(); + elna_global_decls = hash_map::create_ggc(default_hash_map_size); + + build_common_builtin_nodes(); + + return true; +} + +using dependency_state = elna::frontend::dependency_state>; + +static elna::frontend::dependency elna_parse_file(dependency_state& state, const char *filename) +{ + std::ifstream entry_point{ filename, std::ios::in }; + + if (!entry_point) + { + fatal_error(UNKNOWN_LOCATION, "Cannot open filename %s: %m", filename); + } + elna::gcc::linemap_guard{ filename }; + elna::frontend::dependency outcome = elna::frontend::read_source(entry_point, filename); + + if (outcome.has_errors()) + { + elna::gcc::report_errors(outcome.errors()); + return outcome; + } + elna::frontend::symbol_bag outcome_bag = elna::frontend::symbol_bag{ std::move(outcome.unresolved), state.globals }; + + for (const auto& sub_tree : outcome.tree->imports) + { + std::filesystem::path sub_path = "source" / elna::frontend::build_path(sub_tree->segments); + std::unordered_map::const_iterator cached_import = + state.cache.find(sub_path); + + if (cached_import == state.cache.end()) + { + elna_parse_file(state, sub_path.c_str()); + cached_import = state.cache.find(sub_path); + } + outcome_bag.add_import(cached_import->second); + } + outcome.errors() = analyze_semantics(filename, outcome.tree, outcome_bag); + + if (outcome.has_errors()) + { + elna::gcc::report_errors(outcome.errors()); + return outcome; + } + state.cache.insert({ filename, outcome_bag }); + elna::gcc::rewrite_symbol_table(outcome_bag.leave(), state.custom); + + return outcome; +} + +static void elna_langhook_parse_file(void) +{ + dependency_state state{ elna::gcc::builtin_symbol_table() }; + + for (unsigned int i = 0; i < num_in_fnames; i++) + { + elna::frontend::dependency outcome = elna_parse_file(state, in_fnames[i]); + + linemap_add(line_table, LC_ENTER, 0, in_fnames[i], 1); + elna::gcc::generic_visitor generic_visitor{ state.custom, state.cache.find(in_fnames[i])->second }; + outcome.tree->accept(&generic_visitor); + linemap_add(line_table, LC_LEAVE, 0, NULL, 0); + } +} + +static tree elna_langhook_type_for_mode(enum machine_mode mode, int unsignedp) +{ + if (mode == TYPE_MODE(float_type_node)) + { + return float_type_node; + } + else if (mode == TYPE_MODE(double_type_node)) + { + return double_type_node; + } + if (mode == TYPE_MODE(intQI_type_node)) + { + return unsignedp ? unsigned_intQI_type_node : intQI_type_node; + } + else if (mode == TYPE_MODE(intHI_type_node)) + { + return unsignedp ? unsigned_intHI_type_node : intHI_type_node; + } + else if (mode == TYPE_MODE(intSI_type_node)) + { + return unsignedp ? unsigned_intSI_type_node : intSI_type_node; + } + else if (mode == TYPE_MODE(intDI_type_node)) + { + return unsignedp ? unsigned_intDI_type_node : intDI_type_node; + } + else if (mode == TYPE_MODE(intTI_type_node)) + { + return unsignedp ? unsigned_intTI_type_node : intTI_type_node; + } + else if (mode == TYPE_MODE(integer_type_node)) + { + return unsignedp ? unsigned_type_node : integer_type_node; + } + else if (mode == TYPE_MODE(long_integer_type_node)) + { + return unsignedp ? long_unsigned_type_node : long_integer_type_node; + } + else if (mode == TYPE_MODE(long_long_integer_type_node)) + { + return unsignedp + ? long_long_unsigned_type_node + : long_long_integer_type_node; + } + if (COMPLEX_MODE_P(mode)) + { + if (mode == TYPE_MODE(complex_float_type_node)) + { + return complex_float_type_node; + } + if (mode == TYPE_MODE(complex_double_type_node)) + { + return complex_double_type_node; + } + if (mode == TYPE_MODE(complex_long_double_type_node)) + { + return complex_long_double_type_node; + } + if (mode == TYPE_MODE(complex_integer_type_node) && !unsignedp) + { + return complex_integer_type_node; + } + } + /* gcc_unreachable */ + return nullptr; +} + +static bool global_bindings_p(void) +{ + return current_function_decl == NULL_TREE; +} + +static tree pushdecl(tree decl) +{ + return decl; +} + +static tree elna_langhook_builtin_function(tree decl) +{ + elna_global_decls->put(IDENTIFIER_POINTER(DECL_NAME(decl)), decl); + return decl; +} + +static unsigned int elna_langhook_option_lang_mask(void) +{ + return CL_Elna; +} + +/* Creates an expression whose value is that of EXPR, converted to type TYPE. + This function implements all reasonable scalar conversions. */ +tree convert(tree type, tree expr) +{ + if (error_operand_p(type) || error_operand_p(expr)) + { + return error_mark_node; + } + if (TREE_TYPE(expr) == type) + { + return expr; + } + return error_mark_node; +} + +#undef LANG_HOOKS_NAME +#define LANG_HOOKS_NAME "GNU Elna" + +#undef LANG_HOOKS_INIT +#define LANG_HOOKS_INIT elna_langhook_init + +#undef LANG_HOOKS_PARSE_FILE +#define LANG_HOOKS_PARSE_FILE elna_langhook_parse_file + +#undef LANG_HOOKS_TYPE_FOR_MODE +#define LANG_HOOKS_TYPE_FOR_MODE elna_langhook_type_for_mode + +#undef LANG_HOOKS_GETDECLS +#define LANG_HOOKS_GETDECLS hook_tree_void_null + +#undef LANG_HOOKS_BUILTIN_FUNCTION +#define LANG_HOOKS_BUILTIN_FUNCTION elna_langhook_builtin_function + +#undef LANG_HOOKS_IDENTIFIER_SIZE +#define LANG_HOOKS_IDENTIFIER_SIZE sizeof(struct tree_identifier) + +#undef LANG_HOOKS_OPTION_LANG_MASK +#define LANG_HOOKS_OPTION_LANG_MASK elna_langhook_option_lang_mask + +struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER; + +#include "gt-elna-elna1.h" +#include "gtype-elna.h" diff --git a/gcc/gelna.texi b/gcc/gelna.texi new file mode 100644 index 0000000..e4bc6ce --- /dev/null +++ b/gcc/gelna.texi @@ -0,0 +1,135 @@ +\input texinfo @c -*-texinfo-*- +@setfilename gelna.info +@settitle The GNU Elna Compiler + +@c Create a separate index for command line options +@defcodeindex op +@c Merge the standard indexes into a single one. +@syncodeindex fn cp +@syncodeindex vr cp +@syncodeindex ky cp +@syncodeindex pg cp +@syncodeindex tp cp + +@include gcc-common.texi + +@c Copyright years for this manual. +@set copyrights-elna 2025 + +@copying +@c man begin COPYRIGHT +Copyright @copyright{} @value{copyrights-elna} Free Software Foundation, Inc. + +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.3 or +any later version published by the Free Software Foundation; with no +Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. +A copy of the license is included in the +@c man end +section entitled ``GNU Free Documentation License''. +@ignore +@c man begin COPYRIGHT +man page gfdl(7). +@c man end +@end ignore +@end copying + +@ifinfo +@format +@dircategory Software development +@direntry +* Gelna: (gelna). A GCC-based compiler for the Elna language +@end direntry +@end format + +@insertcopying +@end ifinfo + +@titlepage +@title The GNU Elna Compiler +@versionsubtitle +@author Eugen Wissner + +@page +@vskip 0pt plus 1filll +Published by the Free Software Foundation @* +51 Franklin Street, Fifth Floor@* +Boston, MA 02110-1301, USA@* +@sp 1 +@insertcopying +@end titlepage +@contents +@page + +@node Top +@top Introduction + +This manual describes how to use @command{gelna}, the GNU compiler for +the Elna programming language. This manual is specifically about how to +invoke @command{gelna}. + +@menu +* Copying:: The GNU General Public License. +* GNU Free Documentation License:: + How you can share and copy this manual. +* Invoking gelna:: How to run gelna. +* Option Index:: Index of command line options. +* Keyword Index:: Index of concepts. +@end menu + + +@include gpl_v3.texi + +@include fdl.texi + + +@node Invoking gelna +@chapter Invoking gelna + +@c man title gelna A GCC-based compiler for the Elna language + +@ignore +@c man begin SYNOPSIS gelna +gelna [@option{-c}|@option{-S}] + [@option{-g}] [@option{-pg}] + [@option{-o} @var{outfile}] @var{infile}@dots{} + +Only the most useful options are listed here; see below for the +remainder. +@c man end +@c man begin SEEALSO +gpl(7), gfdl(7), fsf-funding(7), gcc(1) +and the Info entries for @file{gelna} and @file{gcc}. +@c man end +@end ignore + +@c man begin DESCRIPTION gelna + +The @command{gelna} command is a frontend to @command{gcc} and +supports many of the same options. @xref{Option Summary, , Option +Summary, gcc, Using the GNU Compiler Collection (GCC)}. This manual +only documents the options specific to @command{gelna}. + +@c man end + +@c man begin OPTIONS gelna + +@c man end + +@node Option Index +@unnumbered Option Index + +@command{gelna}'s command line options are indexed here without any +initial @samp{-} or @samp{--}. Where an option has both positive and +negative forms (such as -foption and -fno-option), relevant entries in +the manual are indexed under the most appropriate form; it may sometimes +be useful to look up both forms. + +@printindex op + +@node Keyword Index +@unnumbered Keyword Index + +@printindex cp + +@bye diff --git a/gcc/lang-specs.h b/gcc/lang-specs.h new file mode 100644 index 0000000..ac3611d --- /dev/null +++ b/gcc/lang-specs.h @@ -0,0 +1,28 @@ +/* GCC driver specs for Elna frontend. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* gcc/gcc.cc */ +{".elna", "@elna", nullptr, 0, 0}, +{"@elna", + "elna1 %i \ + %{!Q:-quiet} " DUMPS_OPTIONS("") " %{m*} %{aux-info*} \ + %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs} \ + %{pg:-p} %{p} %{f*} %{undef} \ + %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %w%b.s}}} \ + %{fsyntax-only:-o %j} %{-param*} \ + %{!fsyntax-only:%(invoke_as)}", + nullptr, 0, 0}, diff --git a/gcc/lang.opt b/gcc/lang.opt new file mode 100644 index 0000000..1c8c95c --- /dev/null +++ b/gcc/lang.opt @@ -0,0 +1,23 @@ +; lang.opt -- Options for the Elna front end. +; Copyright (C) 2025 Free Software Foundation, Inc. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual for a description of this file's format. + +; Please try to keep this file in ASCII collating order. + +Language +Elna diff --git a/gcc/lang.opt.urls b/gcc/lang.opt.urls new file mode 100644 index 0000000..a383952 --- /dev/null +++ b/gcc/lang.opt.urls @@ -0,0 +1,2 @@ +; Autogenerated by regenerate-opt-urls.py from gcc/lang.opt and generated HTML + diff --git a/include/elna/frontend/ast.h b/include/elna/frontend/ast.h new file mode 100644 index 0000000..bbb8a36 --- /dev/null +++ b/include/elna/frontend/ast.h @@ -0,0 +1,815 @@ +/* Abstract syntax tree representation. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include +#include +#include +#include +#include +#include "elna/frontend/symbol.h" +#include "elna/frontend/result.h" + +namespace elna::frontend +{ + enum class binary_operator + { + sum, + subtraction, + multiplication, + division, + remainder, + equals, + not_equals, + less, + greater, + less_equal, + greater_equal, + disjunction, + conjunction, + exclusive_disjunction, + shift_left, + shift_right + }; + + enum class unary_operator + { + reference, + negation, + minus + }; + + class variable_declaration; + class constant_declaration; + class procedure_declaration; + class type_declaration; + class procedure_call; + class cast_expression; + class assign_statement; + class if_statement; + class import_declaration; + class while_statement; + class return_statement; + class case_statement; + class traits_expression; + class unit; + class program; + class binary_expression; + class unary_expression; + class named_type_expression; + class array_type_expression; + class pointer_type_expression; + class record_type_expression; + class union_type_expression; + class procedure_type_expression; + class enumeration_type_expression; + class variable_expression; + class array_access_expression; + class field_access_expression; + class dereference_expression; + class designator_expression; + class literal_expression; + template + class literal; + class defer_statement; + + /** + * Interface for AST visitors. + */ + struct parser_visitor + { + virtual void visit(variable_declaration *) = 0; + virtual void visit(constant_declaration *) = 0; + virtual void visit(procedure_declaration *) = 0; + virtual void visit(type_declaration *) = 0; + virtual void visit(procedure_call *) = 0; + virtual void visit(cast_expression *) = 0; + virtual void visit(traits_expression *) = 0; + virtual void visit(assign_statement *) = 0; + virtual void visit(if_statement *) = 0; + virtual void visit(import_declaration *) = 0; + virtual void visit(while_statement *) = 0; + virtual void visit(return_statement *) = 0; + virtual void visit(defer_statement *) = 0; + virtual void visit(case_statement *) = 0; + virtual void visit(unit *) = 0; + virtual void visit(program *) = 0; + virtual void visit(binary_expression *) = 0; + virtual void visit(unary_expression *) = 0; + virtual void visit(named_type_expression *) = 0; + virtual void visit(array_type_expression *) = 0; + virtual void visit(pointer_type_expression *) = 0; + virtual void visit(record_type_expression *) = 0; + virtual void visit(union_type_expression *) = 0; + virtual void visit(procedure_type_expression *) = 0; + virtual void visit(enumeration_type_expression *) = 0; + virtual void visit(variable_expression *) = 0; + virtual void visit(array_access_expression *) = 0; + virtual void visit(field_access_expression *) = 0; + virtual void visit(dereference_expression *) = 0; + virtual void visit(literal *) = 0; + virtual void visit(literal *) = 0; + virtual void visit(literal *) = 0; + virtual void visit(literal *) = 0; + virtual void visit(literal *) = 0; + virtual void visit(literal *) = 0; + virtual void visit(literal *) = 0; + }; + + /** + * Abstract visitor that doesn't visit any nodes by default. + */ + class empty_visitor : public parser_visitor + { + [[noreturn]] void not_implemented(); + + public: + [[noreturn]] virtual void visit(named_type_expression *) override; + [[noreturn]] virtual void visit(array_type_expression *) override; + [[noreturn]] virtual void visit(pointer_type_expression *) override; + [[noreturn]] virtual void visit(program *) override; + [[noreturn]] virtual void visit(type_declaration *) override; + [[noreturn]] virtual void visit(record_type_expression *) override; + [[noreturn]] virtual void visit(union_type_expression *) override; + [[noreturn]] virtual void visit(procedure_type_expression *) override; + [[noreturn]] virtual void visit(enumeration_type_expression *) override; + + [[noreturn]] virtual void visit(variable_declaration *) override; + [[noreturn]] virtual void visit(constant_declaration *) override; + [[noreturn]] virtual void visit(procedure_declaration *) override; + [[noreturn]] virtual void visit(assign_statement *) override; + [[noreturn]] virtual void visit(if_statement *) override; + [[noreturn]] virtual void visit(import_declaration *) override; + [[noreturn]] virtual void visit(while_statement *) override; + [[noreturn]] virtual void visit(return_statement *) override; + [[noreturn]] virtual void visit(defer_statement *) override; + [[noreturn]] virtual void visit(case_statement *) override; + [[noreturn]] virtual void visit(procedure_call *) override; + [[noreturn]] virtual void visit(unit *) override; + [[noreturn]] virtual void visit(cast_expression *) override; + [[noreturn]] virtual void visit(traits_expression *) override; + [[noreturn]] virtual void visit(binary_expression *) override; + [[noreturn]] virtual void visit(unary_expression *) override; + [[noreturn]] virtual void visit(variable_expression *) override; + [[noreturn]] virtual void visit(array_access_expression *) override; + [[noreturn]] virtual void visit(field_access_expression *) override; + [[noreturn]] virtual void visit(dereference_expression *) override; + [[noreturn]] virtual void visit(literal *) override; + [[noreturn]] virtual void visit(literal *) override; + [[noreturn]] virtual void visit(literal *) override; + [[noreturn]] virtual void visit(literal *) override; + [[noreturn]] virtual void visit(literal *) override; + [[noreturn]] virtual void visit(literal *) override; + [[noreturn]] virtual void visit(literal *) override; + }; + + /** + * AST node. + */ + class node + { + const struct position source_position; + + protected: + /** + * \param position Source code position. + */ + explicit node(const position position); + + public: + virtual void accept(parser_visitor *visitor) = 0; + virtual ~node() = 0; + + /** + * \return Node position in the source code. + */ + const struct position& position() const; + }; + + class statement : public virtual node + { + }; + + class expression : public virtual node + { + public: + virtual cast_expression *is_cast(); + virtual traits_expression *is_traits(); + virtual binary_expression *is_binary(); + virtual unary_expression *is_unary(); + virtual designator_expression *is_designator(); + virtual procedure_call *is_call_expression(); + virtual literal_expression *is_literal(); + }; + + /** + * Symbol definition. + */ + class declaration : public node + { + protected: + declaration(const struct position position, identifier_definition identifier); + + public: + const identifier_definition identifier; + }; + + /** + * Some type expression. + */ + class type_expression : public node + { + public: + virtual named_type_expression *is_named(); + virtual array_type_expression *is_array(); + virtual pointer_type_expression *is_pointer(); + virtual record_type_expression *is_record(); + virtual union_type_expression *is_union(); + virtual procedure_type_expression *is_procedure(); + virtual enumeration_type_expression *is_enumeration(); + + protected: + type_expression(const struct position position); + }; + + /** + * Expression refering to a type by its name. + */ + class named_type_expression : public type_expression + { + public: + const std::string name; + + named_type_expression(const struct position position, const std::string& name); + void accept(parser_visitor *visitor) override; + named_type_expression *is_named() override; + }; + + class array_type_expression : public type_expression + { + type_expression *m_base; + + public: + const std::uint32_t size; + + array_type_expression(const struct position position, + type_expression *base, const std::uint32_t size); + ~array_type_expression(); + + void accept(parser_visitor *visitor) override; + array_type_expression *is_array() override; + + type_expression& base(); + }; + + class pointer_type_expression : public type_expression + { + type_expression *m_base; + + public: + pointer_type_expression(const struct position position, type_expression *base); + ~pointer_type_expression(); + + void accept(parser_visitor *visitor) override; + pointer_type_expression *is_pointer() override; + + type_expression& base(); + }; + + using field_declaration = std::pair; + + class record_type_expression : public type_expression + { + public: + const std::vector fields; + + record_type_expression(const struct position position, std::vector&& fields); + ~record_type_expression(); + + void accept(parser_visitor *visitor) override; + record_type_expression *is_record() override; + }; + + class union_type_expression : public type_expression + { + public: + std::vector fields; + + union_type_expression(const struct position position, std::vector&& fields); + ~union_type_expression(); + + void accept(parser_visitor *visitor) override; + union_type_expression *is_union() override; + }; + + /** + * Enumeration type. + */ + class enumeration_type_expression : public type_expression + { + public: + const std::vector members; + + enumeration_type_expression(const struct position, std::vector&& members); + + void accept(parser_visitor *visitor) override; + enumeration_type_expression *is_enumeration() override; + }; + + /** + * Variable declaration. + */ + class variable_declaration : public node + { + std::shared_ptr m_variable_type; + + public: + variable_declaration(const struct position position, + std::vector&& identifier, std::shared_ptr variable_type, + expression *body = nullptr); + variable_declaration(const struct position position, + std::vector&& identifier, std::shared_ptr variable_type, + std::monostate); + + void accept(parser_visitor *visitor) override; + + bool has_initializer() const; + + const std::vector identifiers; + type_expression& variable_type(); + expression *const body{ nullptr }; + const bool is_extern{ false }; + }; + + /** + * Literal expression. + */ + class literal_expression : public expression + { + public: + literal_expression *is_literal() override; + + protected: + literal_expression(); + }; + + /** + * Constant definition. + */ + class constant_declaration : public declaration + { + expression *m_body; + + public: + constant_declaration(const struct position position, identifier_definition identifier, + expression *body); + void accept(parser_visitor *visitor) override; + + expression& body(); + + virtual ~constant_declaration() override; + }; + + /** + * Procedure type. + */ + class procedure_type_expression : public type_expression + { + public: + using return_t = return_declaration; + + const return_t return_type; + std::vector parameters; + + procedure_type_expression(const struct position position, return_t return_type = return_t()); + ~procedure_type_expression(); + + void accept(parser_visitor *visitor) override; + procedure_type_expression *is_procedure() override; + }; + + struct block + { + block(std::vector&& constants, std::vector&& variables, + std::vector&& body); + block(const block&) = delete; + block(block&& that); + + block& operator=(const block&) = delete; + block& operator=(block&& that); + + const std::vector& variables(); + const std::vector& constants(); + const std::vector& body(); + + virtual ~block(); + + private: + std::vector m_variables; + std::vector m_constants; + std::vector m_body; + + }; + + /** + * Procedure definition. + */ + class procedure_declaration : public declaration + { + procedure_type_expression *m_heading; + + public: + std::optional body; + std::vector parameter_names; + + procedure_declaration(const struct position position, identifier_definition identifier, + procedure_type_expression *heading, block&& body); + procedure_declaration(const struct position position, identifier_definition identifier, + procedure_type_expression *heading); + void accept(parser_visitor *visitor) override; + + procedure_type_expression& heading(); + + virtual ~procedure_declaration() override; + }; + + /** + * Type definition. + */ + class type_declaration : public declaration + { + type_expression *m_body; + + public: + type_declaration(const struct position position, identifier_definition identifier, + type_expression *expression); + ~type_declaration(); + + void accept(parser_visitor *visitor) override; + + type_expression& body(); + }; + + /** + * Cast expression. + */ + class cast_expression : public expression + { + type_expression *m_target; + expression *m_value; + + public: + type expression_type; + + cast_expression(const struct position position, type_expression *target, expression *value); + void accept(parser_visitor *visitor) override; + cast_expression *is_cast() override; + + type_expression& target(); + expression& value(); + + virtual ~cast_expression() override; + }; + + class traits_expression : public expression + { + public: + std::vector parameters; + const std::string name; + std::vector types; + + traits_expression(const struct position position, const std::string& name); + ~traits_expression(); + + void accept(parser_visitor *visitor) override; + traits_expression *is_traits() override; + }; + + /** + * List of statements paired with a condition. + */ + class conditional_statements + { + expression *m_prerequisite; + + public: + const std::vector statements; + + conditional_statements(expression *prerequisite, std::vector&& statements); + + expression& prerequisite(); + + virtual ~conditional_statements(); + }; + + class return_statement : public statement + { + public: + expression *m_return_expression; + + return_statement(const struct position position, expression *return_expression); + void accept(parser_visitor *visitor) override; + + expression& return_expression(); + + virtual ~return_statement() override; + }; + + struct switch_case + { + std::vector labels; + std::vector statements; + }; + + class case_statement : public statement + { + expression *m_condition; + + public: + const std::vector cases; + const std::vector *alternative; + + case_statement(const struct position position, expression *condition, + std::vector&& cases, std::vector *alternative = nullptr); + void accept(parser_visitor *visitor) override; + expression& condition(); + }; + + class designator_expression : public expression + { + public: + virtual variable_expression *is_variable(); + virtual array_access_expression *is_array_access(); + virtual field_access_expression *is_field_access(); + virtual dereference_expression *is_dereference(); + + designator_expression *is_designator() override; + void accept(parser_visitor *visitor); + ~designator_expression() = 0; + + protected: + designator_expression(); + }; + + class variable_expression : public designator_expression, public literal_expression + { + public: + const std::string name; + + variable_expression(const struct position position, const std::string& name); + void accept(parser_visitor *visitor) override; + + variable_expression *is_variable() override; + }; + + class array_access_expression : public designator_expression + { + expression *m_base; + expression *m_index; + + public: + array_access_expression(const struct position position, expression *base, expression *index); + void accept(parser_visitor *visitor) override; + + expression& base(); + expression& index(); + + array_access_expression *is_array_access() override; + + ~array_access_expression() override; + }; + + class field_access_expression : public designator_expression + { + expression *m_base; + std::string m_field; + + public: + field_access_expression(const struct position position, expression *base, + const std::string& field); + void accept(parser_visitor *visitor) override; + + expression& base(); + std::string& field(); + + field_access_expression *is_field_access() override; + + ~field_access_expression() override; + }; + + class dereference_expression : public designator_expression + { + expression *m_base; + + public: + dereference_expression(const struct position position, expression *base); + void accept(parser_visitor *visitor) override; + + expression& base(); + + dereference_expression *is_dereference() override; + + ~dereference_expression() override; + }; + + /** + * Procedure call expression. + */ + class procedure_call : public expression, public statement + { + designator_expression *m_callable; + + public: + std::vector arguments; + + procedure_call(const struct position position, designator_expression *callable); + void accept(parser_visitor *visitor) override; + virtual procedure_call *is_call_expression() override; + + designator_expression& callable(); + + virtual ~procedure_call() override; + }; + + class assign_statement : public statement + { + designator_expression *m_lvalue; + expression *m_rvalue; + + public: + /** + * \param position Source code position. + * \param lvalue Left-hand side. + * \param rvalue Assigned expression. + */ + assign_statement(const struct position position, designator_expression *lvalue, + expression *rvalue); + void accept(parser_visitor *visitor) override; + + designator_expression& lvalue(); + expression& rvalue(); + + virtual ~assign_statement() override; + }; + + /** + * If-statement. + */ + class if_statement : public statement + { + conditional_statements *m_body; + + public: + const std::vector branches; + const std::vector *alternative; + + if_statement(const struct position position, conditional_statements *body, + std::vector&& branches, + std::vector *alternative = nullptr); + void accept(parser_visitor *visitor) override; + + conditional_statements& body(); + + virtual ~if_statement() override; + }; + + /** + * Import statement. + */ + class import_declaration : public node + { + public: + const std::vector segments; + + import_declaration(const struct position position, std::vector&& segments); + void accept(parser_visitor *visitor) override; + }; + + /** + * While-statement. + */ + class while_statement : public statement + { + conditional_statements *m_body; + + public: + const std::vector branches; + + while_statement(const struct position position, conditional_statements *body, + std::vector&& branches); + void accept(parser_visitor *visitor) override; + + conditional_statements& body(); + + virtual ~while_statement() override; + }; + + class unit : public node + { + public: + std::vector imports; + std::vector constants; + std::vector types; + std::vector variables; + std::vector procedures; + + unit(const struct position position); + virtual void accept(parser_visitor *visitor) override; + + virtual ~unit() override; + }; + + class program : public unit + { + public: + std::vector body; + + program(const struct position position); + void accept(parser_visitor *visitor) override; + + virtual ~program() override; + }; + + template + class literal : public literal_expression + { + public: + T value; + + literal(const struct position position, const T& value) + : node(position), value(value) + { + } + + void accept(parser_visitor *visitor) override + { + visitor->visit(this); + } + }; + + class defer_statement : public statement + { + public: + const std::vector statements; + + defer_statement(const struct position position, std::vector&& statements); + void accept(parser_visitor *visitor) override; + + virtual ~defer_statement() override; + }; + + class binary_expression : public expression + { + expression *m_lhs; + expression *m_rhs; + binary_operator m_operator; + + public: + binary_expression(const struct position position, expression *lhs, + expression *rhs, const binary_operator operation); + + void accept(parser_visitor *visitor) override; + binary_expression *is_binary() override; + + expression& lhs(); + expression& rhs(); + binary_operator operation() const; + + virtual ~binary_expression() override; + }; + + class unary_expression : public expression + { + expression *m_operand; + unary_operator m_operator; + + public: + unary_expression(const struct position position, expression *operand, + const unary_operator operation); + + void accept(parser_visitor *visitor) override; + unary_expression *is_unary() override; + + expression& operand(); + unary_operator operation() const; + + virtual ~unary_expression() override; + }; + + const char *print_binary_operator(const binary_operator operation); +} diff --git a/include/elna/frontend/dependency.h b/include/elna/frontend/dependency.h new file mode 100644 index 0000000..f1502d1 --- /dev/null +++ b/include/elna/frontend/dependency.h @@ -0,0 +1,55 @@ +/* Dependency graph analysis. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include +#include +#include "elna/frontend/result.h" +#include "elna/frontend/ast.h" +#include "elna/frontend/symbol.h" + +namespace elna::frontend +{ + class dependency : public error_container + { + error_list m_errors; + + public: + std::unique_ptr tree; + forward_table unresolved; + + explicit dependency(const char *path); + }; + + dependency read_source(std::istream& entry_point, const char *entry_path); + std::filesystem::path build_path(const std::vector& segments); + error_list analyze_semantics(const char *path, std::unique_ptr& tree, symbol_bag bag); + + template + struct dependency_state + { + const std::shared_ptr globals; + T custom; + std::unordered_map cache; + + explicit dependency_state(T custom) + : globals(builtin_symbol_table()), custom(custom) + { + } + }; +} diff --git a/include/elna/frontend/driver.h b/include/elna/frontend/driver.h new file mode 100644 index 0000000..66ef579 --- /dev/null +++ b/include/elna/frontend/driver.h @@ -0,0 +1,51 @@ +/* Parsing driver. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include +#include "elna/frontend/ast.h" +#include "location.hh" + +namespace elna::frontend +{ + position make_position(const yy::location& location); + + class syntax_error final : public error + { + std::string message; + + public: + syntax_error(const std::string& message, + const char *input_file, const yy::location& location); + + virtual std::string what() const override; + }; + + class driver : public error_container + { + public: + std::unique_ptr tree; + + driver(const char *input_file); + }; + + constexpr char escape_invalid_char = '\xff'; + + char escape_char(char escape); + std::optional escape_string(const char *escape); +} diff --git a/include/elna/frontend/result.h b/include/elna/frontend/result.h new file mode 100644 index 0000000..7e5ed77 --- /dev/null +++ b/include/elna/frontend/result.h @@ -0,0 +1,124 @@ +/* Miscellaneous types used across stage boundaries. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace elna::frontend +{ + /** + * Position in the source text. + */ + struct position + { + /// Line. + std::size_t line = 1; + + /// Column. + std::size_t column = 1; + }; + + /** + * A compilation error consists of an error message and position. + */ + class error + { + protected: + error(const char *path, const struct position position); + + public: + const struct position position; + const char *path; + + virtual ~error() = default; + + /// Error text. + virtual std::string what() const = 0; + + /// Error line in the source text. + std::size_t line() const; + + /// Error column in the source text. + std::size_t column() const; + }; + + using error_list = typename std::deque>; + + class error_container + { + protected: + error_list m_errors; + + error_container(const char *input_file); + + public: + const char *input_file; + + error_list& errors(); + + template + void add_error(Args... arguments) + { + auto new_error = std::make_unique(arguments...); + m_errors.emplace_back(std::move(new_error)); + } + + bool has_errors() const; + }; + + /** + * Tags a procedure type as never returning. + */ + template + struct return_declaration + { + return_declaration() = default; + + explicit return_declaration(T type) + : proper_type(type) + { + } + + explicit return_declaration(std::monostate) + : no_return(true) + { + } + + T proper_type{}; + bool no_return{ false }; + }; + + struct identifier_definition + { + std::string name; + bool exported; + + bool operator==(const identifier_definition& that) const; + bool operator==(const std::string& that) const; + }; +} + +template<> +struct std::hash +{ + std::size_t operator()(const elna::frontend::identifier_definition& key) const noexcept; +}; diff --git a/include/elna/frontend/semantic.h b/include/elna/frontend/semantic.h new file mode 100644 index 0000000..8a295e4 --- /dev/null +++ b/include/elna/frontend/semantic.h @@ -0,0 +1,190 @@ +/* Name analysis. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include +#include +#include +#include + +#include "elna/frontend/ast.h" +#include "elna/frontend/result.h" +#include "elna/frontend/symbol.h" + +namespace elna::frontend +{ + class undeclared_error : public error + { + const std::string identifier; + + public: + undeclared_error(const std::string& identifier, const char *path, const struct position position); + + std::string what() const override; + }; + + class already_declared_error : public error + { + const std::string identifier; + + public: + already_declared_error(const std::string& identifier, const char *path, const struct position position); + + std::string what() const override; + }; + + class field_duplication_error : public error + { + const std::string field_name; + + public: + field_duplication_error(const std::string& field_name, const char *path, const struct position position); + + std::string what() const override; + }; + + class cyclic_declaration_error : public error + { + const std::vector cycle; + + public: + cyclic_declaration_error(const std::vector& cycle, + const char *path, const struct position position); + + std::string what() const override; + }; + + class return_error : public error + { + const std::string identifier; + + public: + return_error(const std::string& identifier, const char *path, const struct position position); + + std::string what() const override; + }; + + class variable_initializer_error : public error + { + public: + variable_initializer_error(const char *path, const struct position position); + + std::string what() const override; + }; + + /** + * Checks types. + */ + class type_analysis_visitor final : public empty_visitor, public error_container + { + bool returns; + symbol_bag bag; + + bool check_unresolved_symbol(std::shared_ptr alias, + std::vector& path); + + public: + explicit type_analysis_visitor(const char *path, symbol_bag bag); + + void visit(program *program) override; + + void visit(procedure_declaration *definition) override; + void visit(assign_statement *) override; + void visit(if_statement *) override; + void visit(while_statement *) override; + void visit(return_statement *) override; + void visit(defer_statement *) override; + void visit(case_statement *) override; + void visit(procedure_call *) override; + void visit(unit *unit) override; + void visit(type_declaration *definition) override; + }; + + /** + * Performs name analysis. + */ + class name_analysis_visitor final : public parser_visitor, public error_container + { + type current_type; + constant_info::variant current_literal; + + symbol_bag bag; + + procedure_type build_procedure(procedure_type_expression& type_expression); + std::vector build_composite_type(const std::vector& fields); + + public: + name_analysis_visitor(const char *path, symbol_bag bag); + + void visit(named_type_expression *type_expression) override; + void visit(array_type_expression *type_expression) override; + void visit(pointer_type_expression *type_expression) override; + void visit(program *program) override; + void visit(type_declaration *definition) override; + void visit(record_type_expression *type_expression) override; + void visit(union_type_expression *type_expression) override; + void visit(procedure_type_expression *type_expression) override; + void visit(enumeration_type_expression *type_expression) override; + + void visit(variable_declaration *declaration) override; + void visit(constant_declaration *definition) override; + void visit(procedure_declaration *definition) override; + void visit(assign_statement *statement) override; + void visit(if_statement *statement) override; + void visit(import_declaration *) override; + void visit(while_statement *statement) override; + void visit(return_statement *statement) override; + void visit(defer_statement *statement) override; + void visit(case_statement *statement) override; + void visit(procedure_call *call) override; + void visit(unit *unit) override; + void visit(cast_expression *expression) override; + void visit(traits_expression *trait) override; + void visit(binary_expression *expression) override; + void visit(unary_expression *expression) override; + void visit(variable_expression *) override; + void visit(array_access_expression *expression) override; + void visit(field_access_expression *expression) override; + void visit(dereference_expression *expression) override; + void visit(literal *literal) override; + void visit(literal *literal) override; + void visit(literal *literal) override; + void visit(literal *literal) override; + void visit(literal *literal) override; + void visit(literal *literal) override; + void visit(literal *literal) override; + }; + + /** + * Collects global declarations without resolving any symbols. + */ + class declaration_visitor final : public empty_visitor, public error_container + { + public: + forward_table unresolved; + + explicit declaration_visitor(const char *path); + + void visit(program *program) override; + void visit(import_declaration *) override; + void visit(unit *unit) override; + void visit(type_declaration *definition) override; + void visit(variable_declaration *declaration) override; + void visit(procedure_declaration *definition) override; + }; +} diff --git a/include/elna/frontend/symbol.h b/include/elna/frontend/symbol.h new file mode 100644 index 0000000..ec912ef --- /dev/null +++ b/include/elna/frontend/symbol.h @@ -0,0 +1,457 @@ +/* Symbol definitions. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "elna/frontend/result.h" + +namespace elna::frontend +{ + class alias_type; + class primitive_type; + class record_type; + class union_type; + class pointer_type; + class array_type; + class procedure_type; + class enumeration_type; + + class type + { + enum class type_tag + { + empty, + alias, + primitive, + record, + _union, + pointer, + array, + procedure, + enumeration + }; + type_tag tag{ type_tag::empty }; + union + { + std::weak_ptr alias; + std::shared_ptr primitive; + std::shared_ptr record; + std::shared_ptr _union; + std::shared_ptr pointer; + std::shared_ptr array; + std::shared_ptr procedure; + std::shared_ptr enumeration; + }; + + void copy(const type& other); + void move(type&& other); + + public: + type(); + explicit type(std::shared_ptr alias); + explicit type(std::shared_ptr primitive); + explicit type(std::shared_ptr record); + explicit type(std::shared_ptr _union); + explicit type(std::shared_ptr pointer); + explicit type(std::shared_ptr array); + explicit type(std::shared_ptr procedure); + explicit type(std::shared_ptr enumeration); + + type(const type& other); + type& operator=(const type& other); + + type(type&& other); + type& operator=(type&& other); + + bool operator==(const std::nullptr_t&); + + ~type(); + + template + std::shared_ptr get() const; + + bool empty() const; + }; + + struct alias_type + { + const std::string name; + type reference; + + explicit alias_type(const std::string& name); + }; + + struct pointer_type + { + const type base; + + explicit pointer_type(type base); + }; + + struct array_type + { + const type base; + const std::uint64_t size; + + array_type(type base, std::uint64_t size); + }; + + struct primitive_type + { + const std::string identifier; + + explicit primitive_type(const std::string& identifier); + }; + + using type_field = std::pair; + + struct record_type + { + std::vector fields; + }; + + struct union_type + { + std::vector fields; + }; + + struct procedure_type + { + using return_t = return_declaration; + + std::vector parameters; + const return_t return_type; + + procedure_type(return_t return_type = return_t()); + }; + + struct enumeration_type + { + std::vector members; + + explicit enumeration_type(const std::vector& members); + }; + + class type_info; + class procedure_info; + class constant_info; + class variable_info; + + class info : public std::enable_shared_from_this + { + public: + bool exported{ false }; + + virtual ~info() = 0; + + virtual std::shared_ptr is_type(); + virtual std::shared_ptr is_procedure(); + virtual std::shared_ptr is_constant(); + virtual std::shared_ptr is_variable(); + }; + + /** + * Symbol table. + */ + template + class symbol_map + { + public: + using symbol_ptr = typename std::enable_if< + std::is_convertible::value || std::is_assignable::value, + T + >::type; + using iterator = typename std::unordered_map::iterator; + using const_iterator = typename std::unordered_map::const_iterator; + + private: + std::unordered_map entries; + std::shared_ptr outer_scope; + + public: + /** + * Constructs a new symbol with an optional outer scope. + * + * \param scope Outer scope. + */ + explicit symbol_map(std::shared_ptr scope = nullptr) + : outer_scope(scope) + { + } + + iterator begin() + { + return this->entries.begin(); + } + + iterator end() + { + return this->entries.end(); + } + + const_iterator cbegin() const + { + return this->entries.cbegin(); + } + + const_iterator cend() const + { + return this->entries.cend(); + } + + /** + * \return Symbol count in the current scope. + */ + std::size_t size() const + { + return this->entries.size(); + } + + /** + * Looks for symbol in the table by name. Returns nothing if the symbol + * can not be found. + * + * \param name Symbol name. + * + * \return Symbol from the table if found. + */ + symbol_ptr lookup(const std::string& name) + { + auto entry = entries.find(name); + + if (entry != entries.cend()) + { + return entry->second; + } + if (this->outer_scope != nullptr) + { + return this->outer_scope->lookup(name); + } + return nothing; + } + + /** + * \param name Symbol name. + * + * \return Whether the table contains a symbol with the given name. + */ + bool contains(const std::string& name) + { + return lookup(name) != nothing; + } + + /** + * Registers new symbol. + * + * \param name Symbol name. + * \param entry Symbol information. + * + * \return Whether the insertion took place. + */ + bool enter(const std::string& name, symbol_ptr entry) + { + return lookup(name) == nothing && entries.insert({ name, entry }).second; + } + + /** + * Returns the outer scope or nullptr if the this is the global scope. + * + * \return Outer scope. + */ + std::shared_ptr scope() + { + return this->outer_scope; + } + }; + + using symbol_table = symbol_map, std::nullptr_t, nullptr>; + using forward_table = std::unordered_map>; + + class type_info : public info + { + public: + const type symbol; + + explicit type_info(const type symbol); + std::shared_ptr is_type() override; + }; + + /** + * Procedure symbol information. + */ + class procedure_info : public info + { + public: + /// Procedure type. + const procedure_type symbol; + + /// Parameter names. + const std::vector names; + + /// Local definitions. + std::shared_ptr scope; + + /** + * Constructs procedure symbol information. + * + * \param symbol Procedure type. + * \param names Parameter names. + * \param scope Local definition (is `nullptr` for extern symbols). + */ + procedure_info(const procedure_type symbol, const std::vector names, + std::shared_ptr scope = nullptr); + + std::shared_ptr is_procedure() override; + + /** + * \return Whether this is an extern symbol. + */ + bool is_extern() const; + }; + + class constant_info : public info + { + public: + using variant = typename + std::variant; + + const variant symbol; + + explicit constant_info(const variant& symbol); + std::shared_ptr is_constant() override; + }; + + /** + * Variable symbol information. + */ + class variable_info : public info + { + public: + /// Variable type. + const type symbol; + + /// Whether this is an extern symbol. + const bool is_extern; + + /** + * Constructs a variable symbol information. + * + * \param symbol Variable type. + * \param is_extern Whether this is an extern symbol. + */ + variable_info(const type symbol, bool is_extern); + + std::shared_ptr is_variable() override; + }; + + std::shared_ptr builtin_symbol_table(); + + /** + * Symbol bag contains: + * + * - the symbol table of a module itself + * - symbol tables of imported modules + * - forward declarations + */ + class symbol_bag + { + std::shared_ptr symbols; + std::forward_list> imports; + forward_table unresolved; + + public: + + /** + * \param unresolved Forward declarations collected in the previous step. + * \param global_table Global symbols. + */ + symbol_bag(forward_table&& unresolved, std::shared_ptr global_table); + + /** + * Looks up a symbol in the current and imported modules. + * + * \param name Symbol name to look up. + * + * \return Symbol from one of the symbol tables if found. + */ + std::shared_ptr lookup(const std::string& name); + + /** + * Inserts a symbol into the current scope. + * + * \param name Symbol name. + * \param entry Symbol info. + * + * \return Whether the insertion took place. + */ + bool enter(const std::string& name, std::shared_ptr entry); + + /** + * Enters a new scope. + * + * \return Reference to the allocated scope. + */ + std::shared_ptr enter(); + + /** + * Sets the current scope to \a child. + * + * \param child New scope. + */ + void enter(std::shared_ptr child); + + /** + * Leave the current scope. + * + * \return Left scope. + */ + std::shared_ptr leave(); + + /** + * Checks whether there is a forward declaration \a symbol_name and + * returns it if so. + * + * \param symbol_name Type name to look up. + * \return Forward declaration or `nullptr` if the symbol is not declared. + */ + std::shared_ptr declared(const std::string& symbol_name); + + /** + * Completes the forward-declared type \a symbol_name and defines it to + * be \a resolution. + * + * \param symbol_name Type name. + * \param resolution Type definition. + * \return Alias to the defined type. + */ + std::shared_ptr resolve(const std::string& symbol_name, type& resolution); + + /** + * Add imported symbols to the scope. + * + * \param bag Symbol bag of another module. + */ + void add_import(const symbol_bag& bag); + }; +} diff --git a/include/elna/gcc/elna-builtins.h b/include/elna/gcc/elna-builtins.h new file mode 100644 index 0000000..60baab7 --- /dev/null +++ b/include/elna/gcc/elna-builtins.h @@ -0,0 +1,41 @@ +/* Builtin definitions. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-iterator.h" + +#include "elna/gcc/elna-tree.h" + +namespace elna::gcc +{ + void init_ttree(); + std::shared_ptr builtin_symbol_table(); + + void rewrite_symbol_table(std::shared_ptr info_table, std::shared_ptr symbols); + tree handle_symbol(const std::string& symbol_name, std::shared_ptr reference, + std::shared_ptr symbols); + tree get_inner_alias(const frontend::type& type, std::shared_ptr symbols); + void declare_procedure(const std::string& name, const frontend::procedure_info& info, + std::shared_ptr symbols); + tree declare_variable(const std::string& name, const frontend::variable_info& info, + std::shared_ptr symbols); +} diff --git a/include/elna/gcc/elna-diagnostic.h b/include/elna/gcc/elna-diagnostic.h new file mode 100644 index 0000000..83f768e --- /dev/null +++ b/include/elna/gcc/elna-diagnostic.h @@ -0,0 +1,46 @@ +/* Elna frontend specific diagnostic routines. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "input.h" +#include "tree.h" +#include "diagnostic.h" + +#include +#include + +#include "elna/frontend/result.h" + +namespace elna::gcc +{ + struct linemap_guard + { + explicit linemap_guard(const char *filename); + linemap_guard(const linemap_guard&) = delete; + linemap_guard(linemap_guard&&) = delete; + + ~linemap_guard(); + }; + + location_t get_location(const frontend::position *position); + std::string print_type(tree type); + void report_errors(const std::deque>& errors); +} diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h new file mode 100644 index 0000000..97cd512 --- /dev/null +++ b/include/elna/gcc/elna-generic.h @@ -0,0 +1,97 @@ +/* Visitor generating a GENERIC tree. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include "elna/frontend/ast.h" +#include "elna/frontend/symbol.h" +#include "elna/frontend/semantic.h" +#include "elna/gcc/elna-tree.h" + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-iterator.h" + +#include +#include + +namespace elna::gcc +{ + class generic_visitor final : public frontend::empty_visitor + { + tree current_expression{ NULL_TREE }; + elna::frontend::symbol_bag bag; + std::shared_ptr symbols; + + void enter_scope(); + tree leave_scope(); + + void make_if_branch(frontend::conditional_statements& branch, tree goto_endif); + + tree build_arithmetic_operation(frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right); + tree build_comparison_operation(frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right); + tree build_bit_logic_operation(frontend::binary_expression *expression, tree left, tree right); + tree build_equality_operation(frontend::binary_expression *expression, tree left, tree right); + void build_procedure_call(location_t call_location, + tree procedure_address, const std::vector& arguments); + void build_record_call(location_t call_location, + tree symbol, const std::vector& arguments); + bool build_builtin_procedures(frontend::procedure_call *call); + void build_assert_builtin(location_t call_location, const std::vector& arguments); + + bool expect_trait_type_only(frontend::traits_expression *trait); + bool expect_trait_for_integral_type(frontend::traits_expression *trait); + void visit_statements(const std::vector& statements); + bool assert_constant(location_t expression_location); + + public: + generic_visitor(std::shared_ptr symbol_table, elna::frontend::symbol_bag bag); + + void visit(frontend::program *program) override; + void visit(frontend::procedure_declaration *definition) override; + void visit(frontend::procedure_call *call) override; + void visit(frontend::cast_expression *expression) override; + void visit(frontend::traits_expression *trait) override; + void visit(frontend::literal *literal) override; + void visit(frontend::literal *literal) override; + void visit(frontend::literal *literal) override; + void visit(frontend::literal *boolean) override; + void visit(frontend::literal *character) override; + void visit(frontend::literal *) override; + void visit(frontend::literal *string) override; + void visit(frontend::binary_expression *expression) override; + void visit(frontend::unary_expression *expression) override; + void visit(frontend::constant_declaration *definition) override; + void visit(frontend::variable_declaration *declaration) override; + void visit(frontend::variable_expression *expression) override; + void visit(frontend::array_access_expression *expression) override; + void visit(frontend::field_access_expression *expression) override; + void visit(frontend::dereference_expression *expression) override; + void visit(frontend::unit *unit) override; + void visit(frontend::assign_statement *statement) override; + void visit(frontend::if_statement *statement) override; + void visit(frontend::import_declaration *) override; + void visit(frontend::while_statement *statement) override; + void visit(frontend::return_statement *statement) override; + void visit(frontend::defer_statement *statement) override; + void visit(frontend::case_statement *statement) override; + }; +} diff --git a/include/elna/gcc/elna-tree.h b/include/elna/gcc/elna-tree.h new file mode 100644 index 0000000..48dfeb5 --- /dev/null +++ b/include/elna/gcc/elna-tree.h @@ -0,0 +1,105 @@ +/* Utilities to manipulate GCC trees. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +#include + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-iterator.h" +#include "stringpool.h" +#include "fold-const.h" + +#include "elna/frontend/ast.h" +#include "elna/frontend/symbol.h" +#include "elna/gcc/elna1.h" + +namespace elna::gcc +{ + using symbol_table = frontend::symbol_map; + + bool is_integral_type(tree type); + bool is_numeric_type(tree type); + bool is_unique_type(tree type); + bool is_void_type(tree type); + + /** + * \param type The type to evaluate. + * \return Whether this type can be converted to another type. + */ + bool is_castable_type(tree type); + + /** + * \param lhs Left hand value. + * \param rhs Right hand value. + * \return Whether rhs can be assigned to lhs. + */ + bool are_compatible_pointers(tree lhs_type, tree rhs); + + /** + * Prepares a value to be bound to a variable or parameter. + * + * If rvalue is a procedure declaration, builds a procedure pointer. + * + * \param rvalue Value to be assigned. + * \return Processed value. + */ + tree prepare_rvalue(tree rvalue); + + /** + * \param assignee Assignee. + * \param assignee Assignment. + * \return Whether an expression assignment can be assigned to a variable of type assignee. + */ + bool is_assignable_from(tree assignee, tree assignment); + + void append_statement(tree statement_tree); + void defer(tree statement_tree); + tree chain_defer(); + + tree do_pointer_arithmetic(frontend::binary_operator binary_operator, + tree left, tree right, location_t expression_location); + tree build_binary_operation(bool condition, frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right, tree target_type); + tree build_arithmetic_operation(frontend::binary_expression *expression, + tree_code operator_code, tree left, tree right); + tree build_field(location_t location, tree record_type, const std::string name, tree type); + tree find_field_by_name(location_t expression_location, tree type, const std::string& field_name); + tree build_global_pointer_type(tree type); + tree build_static_array_type(tree type, const std::uint64_t size); + tree build_enumeration_type(const std::vector& members); + tree build_label_decl(const char *name, location_t loc); + + tree extract_constant(tree expression); + + template + tree call_built_in(location_t call_location, const char *name, tree return_type, Args... arguments) + { + tree *builtin = elna_global_decls->get(name); + gcc_assert(builtin != nullptr); + + tree fndecl_type = build_function_type(return_type, TYPE_ARG_TYPES(*builtin)); + tree builtin_addr = build1_loc(call_location, ADDR_EXPR, build_pointer_type(fndecl_type), *builtin); + + tree argument_trees[sizeof...(Args)] = {arguments...}; + + return fold_build_call_array(return_type, builtin_addr, sizeof...(Args), argument_trees); + } +} diff --git a/include/elna/gcc/elna1.h b/include/elna/gcc/elna1.h new file mode 100644 index 0000000..91d0e6f --- /dev/null +++ b/include/elna/gcc/elna1.h @@ -0,0 +1,94 @@ +/* Language-dependent hooks for Elna. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#pragma once + +enum elna_tree_index +{ + ELNA_TI_INT_TYPE, + ELNA_TI_WORD_TYPE, + ELNA_TI_CHAR_TYPE, + ELNA_TI_BOOL_TYPE, + ELNA_TI_POINTER_TYPE, + ELNA_TI_FLOAT_TYPE, + ELNA_TI_STRING_TYPE, + ELNA_TI_BOOL_TRUE, + ELNA_TI_BOOL_FALSE, + ELNA_TI_POINTER_NIL, + ELNA_TI_STRING_PTR_FIELD, + ELNA_TI_STRING_LENGTH_FIELD, + ELNA_TI_MAX +}; + +extern GTY(()) tree elna_global_trees[ELNA_TI_MAX]; +extern GTY(()) hash_map *elna_global_decls; + +#define elna_int_type_node elna_global_trees[ELNA_TI_INT_TYPE] +#define elna_word_type_node elna_global_trees[ELNA_TI_WORD_TYPE] +#define elna_char_type_node elna_global_trees[ELNA_TI_CHAR_TYPE] +#define elna_bool_type_node elna_global_trees[ELNA_TI_BOOL_TYPE] +#define elna_pointer_type_node elna_global_trees[ELNA_TI_POINTER_TYPE] +#define elna_float_type_node elna_global_trees[ELNA_TI_FLOAT_TYPE] +#define elna_string_type_node elna_global_trees[ELNA_TI_STRING_TYPE] +#define elna_bool_true_node elna_global_trees[ELNA_TI_BOOL_TRUE] +#define elna_bool_false_node elna_global_trees[ELNA_TI_BOOL_FALSE] +#define elna_pointer_nil_node elna_global_trees[ELNA_TI_POINTER_NIL] +#define elna_string_ptr_field_node elna_global_trees[ELNA_TI_STRING_PTR_FIELD] +#define elna_string_length_field_node elna_global_trees[ELNA_TI_STRING_LENGTH_FIELD] + +/* Language-dependent contents of a type. */ +struct GTY (()) lang_type +{ +}; + +/* Language-dependent contents of a decl. */ +struct GTY (()) lang_decl +{ +}; + +struct GTY (()) defer_scope +{ + tree defer_block; + tree try_statements; +}; + +struct GTY ((chain_next ("%h.level_chain"))) binding_level +{ + // A block chain is needed to call defer statements beloning to each block. + tree blocks; + + // Parent level. + struct binding_level *level_chain; + + // Statements before the first defer has been seen. + tree statement_list; + + // Defer statement coupled with statements following it. + vec *defers; +}; + +struct GTY (()) language_function +{ + // Local variables and constants. + tree names; + + // Lexical scope. + struct binding_level *binding_level; +}; + +#define f_binding_level DECL_STRUCT_FUNCTION(current_function_decl)->language->binding_level +#define f_names DECL_STRUCT_FUNCTION(current_function_decl)->language->names diff --git a/rakelib/gcc.rake b/rakelib/gcc.rake new file mode 100644 index 0000000..39b4442 --- /dev/null +++ b/rakelib/gcc.rake @@ -0,0 +1,113 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. -} +# frozen_string_literal: true + +require 'uri' +require 'net/http' +require 'open3' +require 'pathname' + +def gcc_verbose(gcc_binary) + read, write = IO.pipe + sh({'LANG' => 'C'}, gcc_binary, '--verbose', err: write) + write.close + output = read.read + read.close + output +end + +def find_build_target + gcc_verbose(ENV.fetch 'CC', 'gcc') + .lines + .find { |line| line.start_with? 'Target: ' } + .split(' ') + .last + .strip +end + +def download_and_pipe(url, target, command) + target.mkpath + + Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == 'https') do |http| + request = Net::HTTP::Get.new url.request_uri + + http.request request do |response| + case response + when Net::HTTPRedirection + download_and_pipe URI.parse(response['location']), target, command + when Net::HTTPSuccess + Dir.chdir target.to_path do + Open3.popen2(*command) do |stdin, stdout, wait_thread| + Thread.new do + stdout.each { |line| puts line } + end + + response.read_body do |chunk| + stdin.write chunk + end + stdin.close + + wait_thread.value + end + end + else + response.error! + end + end + end +end + +namespace :gcc do + # Dependencies. + GCC_VERSION = "15.2.0" + HOST_GCC = 'build/host/gcc' + HOST_INSTALL = 'build/host/install' + GCC_PATCH = 'https://raw.githubusercontent.com/Homebrew/formula-patches/575ffcaed6d3112916fed77d271dd3799a7255c4/gcc/gcc-15.1.0.diff' + + directory HOST_GCC + directory HOST_INSTALL + directory 'build/tools' + + desc 'Download and configure the bootstrap compiler' + task configure: ['build/tools', HOST_GCC, HOST_INSTALL] do + url = URI.parse "https://gcc.gnu.org/pub/gcc/releases/gcc-#{GCC_VERSION}/gcc-#{GCC_VERSION}.tar.xz" + build_target = find_build_target + source_directory = Pathname.new "build/tools/gcc-#{GCC_VERSION}" + frontend_link = source_directory + 'gcc' + + download_and_pipe url, source_directory.dirname, ['tar', '-Jxv'] + download_and_pipe URI.parse(GCC_PATCH), source_directory, ['patch', '-p1'] + + sh 'contrib/download_prerequisites', chdir: source_directory.to_path + File.symlink Pathname.new('.').relative_path_from(frontend_link), (frontend_link + 'elna') + + configure_options = [ + "--prefix=#{File.realpath HOST_INSTALL}", + '--enable-languages=c,c++,elna', + '--disable-bootstrap', + '--disable-multilib', + '--with-system-zlib', + "--target=#{build_target}", + "--build=#{build_target}", + "--host=#{build_target}" + ] + if File.symlink? '/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk' + configure_options << '--with-sysroot=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk' + end + env = ENV.slice 'CC', 'CXX' + env['CFLAGS'] = env['CXXFLAGS'] = '-O0 -g -fPIC -I/opt/homebrew/opt/flex/include' + + configure = source_directory.relative_path_from(HOST_GCC) + 'configure' + sh env, configure.to_path, *configure_options, chdir: HOST_GCC + end + + desc 'Make and install the bootstrap compiler' + task :make do + sh 'make', '-j', Etc.nprocessors.to_s, chdir: HOST_GCC + sh 'make', 'install', chdir: HOST_GCC + end +end + +desc 'Build the bootstrap compiler' +task gcc: %w[gcc:configure gcc:make] diff --git a/rakelib/modula.rake b/rakelib/modula.rake new file mode 100644 index 0000000..746ebb8 --- /dev/null +++ b/rakelib/modula.rake @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. -} +# frozen_string_literal: true + +require 'pathname' +require 'rake/clean' + +CLEAN.include 'build/gcc' + +task source: ['source/main.elna', 'build/gcc/elna'] do |t| + sources, compiler = t.prerequisites.partition { |f| f.end_with? '.elna' } + + sh *compiler, '--parse', *sources +end + +rule(/gcc\/.+\.o$/ => ->(file) { + source = Pathname.new('source') + + Pathname.new(file).relative_path_from('build/gcc').sub_ext('.elna') + + ['build/host/install/bin/gelna', source] +}) do |t| + Pathname.new(t.name).dirname.mkpath + sources, compiler = t.prerequisites.partition { |source| source.end_with? '.elna' } + + sh *compiler, '-c', '-O0', '-g', '-o', t.name, *sources +end + +file 'build/gcc/elna' => FileList['source/**/*.elna'].reject { |file| + file != file.downcase +}.map { |file| + Pathname.new('build/gcc') + + Pathname.new(file).relative_path_from('source').sub_ext('.o') +} do |t| + sh 'build/host/install/bin/gcc', '-o', t.name, *t.prerequisites +end diff --git a/source/CommandLineInterface.def b/source/CommandLineInterface.def new file mode 100644 index 0000000..e4688c4 --- /dev/null +++ b/source/CommandLineInterface.def @@ -0,0 +1,16 @@ +DEFINITION MODULE CommandLineInterface; + +FROM Common IMPORT ShortString; + +TYPE + CommandLine = RECORD + input: ShortString; + output: ShortString; + lex: BOOLEAN; + parse: BOOLEAN + END; + PCommandLine = POINTER TO CommandLine; + +PROCEDURE parse_command_line(): PCommandLine; + +END CommandLineInterface. diff --git a/source/Common.def b/source/Common.def new file mode 100644 index 0000000..9520230 --- /dev/null +++ b/source/Common.def @@ -0,0 +1,12 @@ +DEFINITION MODULE Common; + +TYPE + ShortString = ARRAY[1..256] OF CHAR; + Identifier = ARRAY[1..256] OF CHAR; + PIdentifier = POINTER TO Identifier; + TextLocation = RECORD + line: CARDINAL; + column: CARDINAL + END; + +END Common. diff --git a/source/Lexer.def b/source/Lexer.def new file mode 100644 index 0000000..883c604 --- /dev/null +++ b/source/Lexer.def @@ -0,0 +1,107 @@ +DEFINITION MODULE Lexer; + +FROM FIO IMPORT File; + +FROM Common IMPORT Identifier, ShortString, TextLocation; + +TYPE + PLexerBuffer = POINTER TO CHAR; + BufferPosition = RECORD + iterator: PLexerBuffer; + location: TextLocation + END; + PBufferPosition = POINTER TO BufferPosition; + Lexer = RECORD + input: File; + buffer: PLexerBuffer; + size: CARDINAL; + length: CARDINAL; + start: BufferPosition; + current: BufferPosition + END; + PLexer = POINTER TO Lexer; + LexerKind = ( + lexerKindEof, + lexerKindIdentifier, + lexerKindIf, + lexerKindThen, + lexerKindElse, + lexerKindElsif, + lexerKindWhile, + lexerKindDo, + lexerKindProc, + lexerKindBegin, + lexerKindEnd, + lexerKindXor, + lexerKindConst, + lexerKindVar, + lexerKindCase, + lexerKindOf, + lexerKindType, + lexerKindRecord, + lexerKindUnion, + lexerKindPipe, + lexerKindTo, + lexerKindBoolean, + lexerKindNull, + lexerKindAnd, + lexerKindOr, + lexerKindTilde, + lexerKindReturn, + lexerKindDefer, + lexerKindRange, + lexerKindLeftParen, + lexerKindRightParen, + lexerKindLeftSquare, + lexerKindRightSquare, + lexerKindGreaterEqual, + lexerKindLessEqual, + lexerKindGreaterThan, + lexerKindLessThan, + lexerKindNotEqual, + lexerKindEqual, + lexerKindSemicolon, + lexerKindDot, + lexerKindComma, + lexerKindPlus, + lexerKindMinus, + lexerKindAsterisk, + lexerKindDivision, + lexerKindRemainder, + lexerKindAssignment, + lexerKindColon, + lexerKindHat, + lexerKindAt, + lexerKindComment, + lexerKindInteger, + lexerKindWord, + lexerKindCharacter, + lexerKindString, + lexerKindFrom, + lexerKindPointer, + lexerKindArray, + lexerKindArrow, + lexerKindProgram, + lexerKindModule, + lexerKindImport + ); + LexerToken = RECORD + CASE kind: LexerKind OF + lexerKindBoolean: booleanKind: BOOLEAN | + lexerKindIdentifier: identifierKind: Identifier | + lexerKindInteger: integerKind: INTEGER | + lexerKindString: stringKind: ShortString + END; + start_location: TextLocation; + end_location: TextLocation + END; + PLexerToken = POINTER TO LexerToken; + +PROCEDURE lexer_initialize(lexer: PLexer; input: File); +PROCEDURE lexer_destroy(lexer: PLexer); +(* Returns the last read token. *) +PROCEDURE lexer_current(lexer: PLexer): LexerToken; +(* Read and return the next token. *) +PROCEDURE lexer_lex(lexer: PLexer): LexerToken; + +END Lexer. diff --git a/source/Parser.def b/source/Parser.def new file mode 100644 index 0000000..a766e8e --- /dev/null +++ b/source/Parser.def @@ -0,0 +1,200 @@ +DEFINITION MODULE Parser; + +FROM Common IMPORT Identifier, PIdentifier, ShortString; +FROM Lexer IMPORT PLexer; + +TYPE + Parser = RECORD + lexer: PLexer + END; + PParser = POINTER TO Parser; + + AstLiteralKind = ( + astLiteralKindInteger, + astLiteralKindString, + astLiteralKindNull, + astLiteralKindBoolean + ); + AstLiteral = RECORD + CASE kind: AstLiteralKind OF + astLiteralKindInteger: integer: INTEGER | + astLiteralKindString: string: ShortString | + astLiteralKindNull: | + astLiteralKindBoolean: boolean: BOOLEAN + END + END; + PAstLiteral = POINTER TO AstLiteral; + + AstUnaryOperator = ( + astUnaryOperatorReference, + astUnaryOperatorNot, + astUnaryOperatorMinus + ); + AstBinaryOperator = ( + astBinaryOperatorSum, + astBinaryOperatorSubtraction, + astBinaryOperatorMultiplication, + astBinaryOperatorDivision, + astBinaryOperatorRemainder, + astBinaryOperatorEquals, + astBinaryOperatorNotEquals, + astBinaryOperatorLess, + astBinaryOperatorGreater, + astBinaryOperatorLessEqual, + astBinaryOperatorGreaterEqual, + astBinaryOperatorDisjunction, + astBinaryOperatorConjunction, + astBinaryOperatorExclusiveDisjunction, + astBinaryOperatorShiftLeft, + astBinaryOperatorShiftRight + ); + + AstExpressionKind = ( + astExpressionKindLiteral, + astExpressionKindIdentifier, + astExpressionKindArrayAccess, + astExpressionKindDereference, + astExpressionKindFieldAccess, + astExpressionKindUnary, + astExpressionKindBinary, + astExpressionKindCall + ); + AstExpression = RECORD + CASE kind: AstExpressionKind OF + astExpressionKindLiteral: literal: PAstLiteral | + astExpressionKindIdentifier: identifier: Identifier | + astExpressionKindDereference: reference: PAstExpression | + astExpressionKindArrayAccess: + array: PAstExpression; + index: PAstExpression | + astExpressionKindFieldAccess: + aggregate: PAstExpression; + field: Identifier | + astExpressionKindUnary: + unary_operator: AstUnaryOperator; + unary_operand: PAstExpression | + astExpressionKindBinary: + binary_operator: AstBinaryOperator; + lhs: PAstExpression; + rhs: PAstExpression | + astExpressionKindCall: + callable: PAstExpression; + argument_count: CARDINAL; + arguments: PPAstExpression + END + END; + PAstExpression = POINTER TO AstExpression; + PPAstExpression = POINTER TO PAstExpression; + + AstStatementKind = ( + astStatementKindIf, + astStatementKindWhile, + astStatementKindAssignment, + astStatementKindReturn, + astStatementKindCall + ); + AstStatement = RECORD + CASE kind: AstStatementKind OF + astStatementKindIf: + if_condition: PAstExpression; + if_branch: AstCompoundStatement | + astStatementKindWhile: + while_condition: PAstExpression; + while_body: AstCompoundStatement | + astStatementKindAssignment: + assignee: PAstExpression; + assignment: PAstExpression | + astStatementKindReturn: returned: PAstExpression | + astStatementKindCall: call: PAstExpression + END + END; + PAstStatement = POINTER TO AstStatement; + PPAstStatement = POINTER TO PAstStatement; + AstCompoundStatement = RECORD + count: CARDINAL; + statements: PPAstStatement + END; + + AstImportStatement = RECORD + package: Identifier; + symbols: PIdentifier + END; + PAstImportStatement = POINTER TO AstImportStatement; + PPAstImportStatement = POINTER TO PAstImportStatement; + + AstConstantDeclaration = RECORD + constant_name: Identifier; + constant_value: INTEGER + END; + PAstConstantDeclaration = POINTER TO AstConstantDeclaration; + PPAstConstantDeclaration = POINTER TO PAstConstantDeclaration; + + AstFieldDeclaration = RECORD + field_name: Identifier; + field_type: PAstTypeExpression + END; + PAstFieldDeclaration = POINTER TO AstFieldDeclaration; + + AstTypeExpressionKind = ( + astTypeExpressionKindNamed, + astTypeExpressionKindRecord, + astTypeExpressionKindEnumeration, + astTypeExpressionKindArray, + astTypeExpressionKindPointer, + astTypeExpressionKindProcedure + ); + AstTypeExpression = RECORD + CASE kind: AstTypeExpressionKind OF + astTypeExpressionKindNamed: name: Identifier | + astTypeExpressionKindEnumeration: cases: PIdentifier | + astTypeExpressionKindPointer: target: PAstTypeExpression | + astTypeExpressionKindRecord: fields: PAstFieldDeclaration | + astTypeExpressionKindArray: + base: PAstTypeExpression; + length: CARDINAL | + astTypeExpressionKindProcedure: parameters: PPAstTypeExpression + END + END; + PAstTypeExpression = POINTER TO AstTypeExpression; + PPAstTypeExpression = POINTER TO PAstTypeExpression; + + AstTypedDeclaration = RECORD + identifier: Identifier; + type_expression: PAstTypeExpression + END; + PAstTypedDeclaration = POINTER TO AstTypedDeclaration; + PPAstTypedDeclaration = POINTER TO PAstTypedDeclaration; + + AstVariableDeclaration = RECORD + variable_name: Identifier; + variable_type: PAstTypeExpression + END; + PAstVariableDeclaration = POINTER TO AstVariableDeclaration; + PPAstVariableDeclaration = POINTER TO PAstVariableDeclaration; + + AstProcedureDeclaration = RECORD + name: Identifier; + parameter_count: CARDINAL; + parameters: PAstTypedDeclaration; + return_type: PAstTypeExpression; + constants: PPAstConstantDeclaration; + variables: PPAstVariableDeclaration; + statements: AstCompoundStatement + END; + PAstProcedureDeclaration = POINTER TO AstProcedureDeclaration; + PPAstProcedureDeclaration = POINTER TO PAstProcedureDeclaration; + + AstModule = RECORD + main: BOOLEAN; + imports: PPAstImportStatement; + constants: PPAstConstantDeclaration; + types: PPAstTypedDeclaration; + variables: PPAstVariableDeclaration; + procedures: PPAstProcedureDeclaration; + statements: AstCompoundStatement + END; + PAstModule = POINTER TO AstModule; + +PROCEDURE parse(lexer: PLexer): PAstModule; + +END Parser. diff --git a/source/Parser.elna b/source/Parser.elna new file mode 100644 index 0000000..1225750 --- /dev/null +++ b/source/Parser.elna @@ -0,0 +1,1174 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +import cstdlib, common, Lexer; + +type + Parser = record + lexer: ^Lexer + end; + + AstLiteralKind* = ( + integer, + string, + null, + boolean + ); + AstLiteral* = record + kind: AstLiteralKind; + value: union + integer: Int; + string: String; + boolean: Bool + end + end; + + AstUnaryOperator* = ( + reference, + not, + minus + ); + AstBinaryOperator* = ( + sum, + subtraction, + multiplication, + division, + remainder, + equals, + not_equals, + less, + greater, + less_equal, + greater_equal, + disjunction, + conjunction, + exclusive_disjunction, + shift_left, + shift_right + ); + + AstExpressionKind* = ( + literal, + identifier, + array_access, + dereference, + field_access, + unary, + binary, + call + ); + AstExpression* = record + kind: AstExpressionKind + value: union + literal: ^AstLiteral; + identifier: Identifier; + reference: ^AstExpression; + array_access: record + array: ^AstExpression; + index: ^AstExpression + end; + field_access: record + aggregate: ^AstExpression; + field: Identifier + end; + unary: record + operator: AstUnaryOperator; + operand: ^AstExpression + end; + binary: record + operator: AstBinaryOperator; + lhs: ^AstExpression; + rhs: ^AstExpression + end; + call: record + callable: ^AstExpression; + argument_count: Word; + arguments: ^^AstExpression + end + end + end; + + ConditionalStatement = record + condition: ^AstExpression; + branch: AstCompoundStatement + end; + + AstStatementKind* = ( + if_statement, + while_statement, + assignment_statement, + return_statement, + call_statement + ); + AstStatement* = record + kind: AstStatementKind + value: union + if_statement: ConditionalStatement; + while_statement: ConditionalStatement; + assignment_statement: record + assignee: ^AstExpression; + assignment: ^AstExpression + end; + return_statement: ^AstExpression; + call_statement: ^AstExpression + end + end; + AstCompoundStatement* = record + count: Word; + statements: ^^AstStatement + end; + + AstImportStatement* = record + package: Identifier; + symbols: ^Identifier + end; + + AstConstantDeclaration* = record + constant_name: Identifier; + constant_value: Int + end; + + AstFieldDeclaration* = record + field_name: Identifier; + field_type: ^AstTypeExpression + end; + + AstTypeExpressionKind* = ( + named_expression, + record_expression, + enumeration_expression, + array_expression, + pointer_expression, + procedure_expression + ); + AstTypeExpression* = record + kind: AstTypeExpressionKind; + value: union + name: Identifier; + cases: ^Identifier; + target: ^AstTypeExpression; + fields: ^AstFieldDeclaration; + array_expression: record + base: ^AstTypeExpression; + length: Word + end; + parameters: ^^AstTypeExpression + end + end; + + AstTypedDeclaration* = record + identifier: Identifier; + type_expression: ^AstTypeExpression + end; + + AstVariableDeclaration* = record + variable_name: Identifier; + variable_type: ^AstTypeExpression + end; + + AstProcedureDeclaration* = record + name: Identifier; + parameter_count: Word; + parameters: ^AstTypedDeclaration; + return_type: ^AstTypeExpression; + constants: ^^AstConstantDeclaration; + variables: ^^AstVariableDeclaration; + statements: AstCompoundStatement + end; + + AstModule* = record + main: Bool; + imports: ^^AstImportStatement; + constants: ^^AstConstantDeclaration; + types: ^^AstTypedDeclaration; + variables: ^^AstVariableDeclaration; + procedures: ^^AstProcedureDeclaration; + statements: AstCompoundStatement + end; + +(* Calls lexer_lex() but skips the comments. *) +proc parser_lex(lexer: ^Lexer) -> LexerToken; +var + result: LexerToken; +begin + result := lexer_lex(lexer); + + while result.kind = lexerKindComment do + result := lexer_lex(lexer) + end; + + return result +end; + +proc parse_type_fields(parser: ^Parser) -> ^AstFieldDeclaration; +var + token: LexerToken; + field_declarations: ^AstFieldDeclaration; + field_count: Word; + current_field: ^AstFieldDeclaration; +begin + field_declarations := malloc(#size(AstFieldDeclaration)); + token := parser_lex(parser^.lexer); + field_count := 0; + + while token.kind <> lexerKindEnd do + field_count := field_count + 2u; + + field_declarations := realloc(field_declarations, #size(AstFieldDeclaration) * field_count); + field_count := field_count - 1u; + current_field := field_declarations + (field_count - 1u); + + token := parser_lex(parser^.lexer); + + current_field^.field_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + current_field^.field_type := parse_type_expression(parser); + token := parser_lex(parser^.lexer); + + if token.kind = lexerKindSemicolon then + token := parser_lex(parser^.lexer) + end + end; + current_field := current_field + 1; + memset(current_field, 0, #size(AstFieldDeclaration)); + + return field_declarations +end; + +proc parse_record_type(parser: ^Parser) -> ^AstTypeExpression; +var + result: ^AstTypeExpression; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.record_expression; + result^.fields := parse_type_fields(parser); + + return result +end; + +proc parse_pointer_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.pointer_expression; + + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindPointer then + token := parser_lex(parser^.lexer) + end; + token := lexer_current(parser^.lexer); + result^.target := parse_type_expression(parser); + + return result +end; + +proc parse_array_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + buffer: [20]Char; + result: ^AstTypeExpression; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.array_expression; + result^.array_expression.length := 0u; + + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindArray then + token := parser_lex(parser^.lexer) + end; + if token.kind <> lexerKindOf then + token := parser_lex(parser^.lexer); + + result^.array_expression.length := token.integerKind; + + token := parser_lex(parser^.lexer) + end; + token := parser_lex(parser^.lexer); + result^.array_expression.base := parse_type_expression(parser); + + return result +end; + +proc parse_enumeration_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; + current_case: ^Identifier; + case_count: Word; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.enumeration_expression; + + case_count := 1u; + result^.cases := malloc(#size(Identifier) * 2); + token := parser_lex(parser^.lexer); + current_case := result^.cases; + current_case^ := token.identifierKind; + + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindComma do + token := parser_lex(parser^.lexer); + + case_count := case_count + 2u; + + result^.cases := realloc(result^.cases, #size(Identifier) * case_count); + case_count := case_count - 1u; + current_case := result^.cases + (case_count - 1u); + current_case^ := token.identifierKind; + + token := parser_lex(parser^.lexer) + end; + current_case := current_case + 1; + memset(current_case, 0, #size(Identifier)); + + return result +end; + +proc parse_named_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; +begin + token := lexer_current(parser^.lexer); + NEW(result); + + result^.kind := AstTypeExpressionKind.named_expression; + result^.name := token.identifierKind; + + return result +end; + +proc parse_procedure_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; + current_parameter: ^^AstTypeExpression; + parameter_count: Word; +begin + parameter_count := 0u; + NEW(result); + result^.kind := AstTypeExpressionKind.procedure_expression; + + result^.parameters := malloc(#size(^AstTypeExpression)); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + while token.kind <> lexerKindRightParen do + parameter_count := parameter_count + 2u; + + result^.parameters := realloc(result^.parameters, #size(^AstTypeExpression) * parameter_count); + parameter_count := parameter_count - 1u; + current_parameter := result^.parameters + (parameter_count - 1u); + + current_parameter^ := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + if token.kind = lexerKindComma then + token := parser_lex(parser^.lexer) + end + end; + current_parameter := result^.parameters + parameter_count; + current_parameter^ := nil; + + return result +end; + +proc parse_type_expression(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; +begin + result := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindRecord then + result := parse_record_type(parser) + end; + if token.kind = lexerKindLeftParen then + result := parse_enumeration_type(parser) + end; + if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then + result := parse_array_type(parser) + end; + if token.kind = lexerKindHat then + result := parse_pointer_type(parser) + end; + if token.kind = lexerKindProc then + result := parse_procedure_type(parser) + end; + if token.kind = lexerKindIdentifier then + result := parse_named_type(parser) + end; + return result +end; + +proc parse_type_declaration(parser: ^Parser) -> ^AstTypedDeclaration; +var + token: LexerToken; + result: ^AstTypedDeclaration; +begin + token := lexer_current(parser^.lexer); + + NEW(result); + result^.identifier := token.identifierKind; + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + result^.type_expression := parse_type_expression(parser); + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_type_part(parser: ^Parser) -> ^^AstTypedDeclaration; +var + token: LexerToken; + result: ^^AstTypedDeclaration; + current_declaration: ^^AstTypedDeclaration; + declaration_count: Word; +begin + token := lexer_current(parser^.lexer); + + result := malloc(#size(^AstTypedDeclaration)); + current_declaration := result; + declaration_count := 0u; + + if token.kind = lexerKindType then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + declaration_count := declaration_count + 1u; + + result := realloc(result, #size(^AstTypedDeclaration) * (declaration_count + 1)); + current_declaration := result + (declaration_count - 1u); + + current_declaration^ := parse_type_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0u then + current_declaration := current_declaration + 1 + end; + current_declaration^ := nil; + + return result +end; + +proc parse_variable_declaration(parser: ^Parser) -> ^AstVariableDeclaration; +var + token: LexerToken; + result: ^AstVariableDeclaration; +begin + NEW(result); + + token := lexer_current(parser^.lexer); + result^.variable_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + token := parser_lex(parser^.lexer); + result^.variable_type := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_variable_part(parser: ^Parser) -> ^^AstVariableDeclaration; +var + token: LexerToken; + result: ^^AstVariableDeclaration; + current_declaration: ^^AstVariableDeclaration; + declaration_count: Word; +begin + token := lexer_current(parser^.lexer); + + result := malloc(#size(^AstVariableDeclaration)); + current_declaration := result; + declaration_count := 0u; + + if token.kind = lexerKindVar then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + declaration_count := declaration_count + 1u; + + result := realloc(result, #size(^AstVariableDeclaration) * (declaration_count + 1)); + current_declaration := result + (declaration_count - 1u); + + current_declaration^ := parse_variable_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + current_declaration := current_declaration + 1 + end; + current_declaration^ := nil; + + return result +end; + +proc parse_constant_declaration(parser: ^Parser) -> ^AstConstantDeclaration; +var + token: LexerToken; + result: ^AstConstantDeclaration; +begin + NEW(result); + + token := lexer_current(parser^.lexer); + result^.constant_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + token := parser_lex(parser^.lexer); + result^.constant_value := token.integerKind; + + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_constant_part(parser: ^Parser) -> ^^AstConstantDeclaration; +var + token: LexerToken; + result: ^^AstConstantDeclaration; + current_declaration: ^^AstConstantDeclaration; + declaration_count: Word; +begin + token := lexer_current(parser^.lexer); + + result := malloc(#size(^AstConstantDeclaration)); + current_declaration := result; + declaration_count := 0u; + + if token.kind = lexerKindConst then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + declaration_count := declaration_count + 1u; + + result := realloc(result, #size(^AstConstantDeclaration) * (declaration_count + 1u)); + current_declaration := result + (declaration_count - 1u); + + current_declaration^ := parse_constant_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + current_declaration := current_declaration + 1 + end; + current_declaration^ := nil; + + return result +end; + +proc parse_import_statement(parser: ^Parser) -> ^AstImportStatement; +var + result: ^AstImportStatement; + token: LexerToken; + symbol_count: Word; + current_symbol: ^Identifier; +begin + NEW(result); + symbol_count := 1u; + + token := parser_lex(parser^.lexer); + result^.package := token.identifierKind; + + token := parser_lex(parser^.lexer); + result^.symbols := malloc(#size(Identifier) * 2); + + current_symbol := result^.symbols; + + token := parser_lex(parser^.lexer); + current_symbol^ := token.identifierKind; + + token := parser_lex(parser^.lexer); + while token.kind <> lexerKindSemicolon do + token := parser_lex(parser^.lexer); + symbol_count := symbol_count + 1u; + + result^.symbols := realloc(result^.symbols, #size(Identifier) * (symbol_count + 1u)); + current_symbol := result^.symbols + (symbol_count - 1u); + + current_symbol^ := token.identifierKind; + token := parser_lex(parser^.lexer) + end; + current_symbol := current_symbol + 1; + memset(current_symbol, 0, #size(Identifier)); + + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_import_part(parser: ^Parser) -> ^^AstImportStatement; +var + token: LexerToken; + import_statement: ^^AstImportStatement; + result: ^^AstImportStatement; + import_count: Word; +begin + token := lexer_current(parser^.lexer); + result := malloc(#size(^AstImportStatement)); + import_statement := result; + import_count := 0u; + + while token.kind = lexerKindFrom do + import_count := import_count + 1u; + + result := realloc(result, #size(^AstImportStatement) * (import_count + 1u)); + import_statement := result + (import_count - 1u); + + import_statement^ := parse_import_statement(parser); + token := lexer_current(parser^.lexer) + end; + if import_count > 0u then + import_statement := import_count + 1 + end; + import_statement^ := nil; + + return result +end; + +proc parse_literal(parser: ^Parser) -> ^AstLiteral; +var + literal: ^AstLiteral; + token: LexerToken; +begin + literal := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindInteger then + NEW(literal); + + literal^.kind := AstLiteralKind.integer; + literal^.integer := token.integerKind + end; + if (token.kind = lexerKindCharacter) or (token.kind = lexerKindString) then + NEW(literal); + + literal^.kind := AstLiteralKind.string; + literal^.string := token.stringKind + end; + if token.kind = lexerKindNull then + NEW(literal); + + literal^.kind := AstLiteralKind.null + end; + if token.kind = lexerKindBoolean then + NEW(literal); + + literal^.kind := AstLiteralKind.boolean; + literal^.boolean := token.booleanKind + end; + if literal <> nil then + token := parser_lex(parser^.lexer) + end; + + return literal +end; + +proc parse_factor(parser: ^Parser) -> ^AstExpression; +var + next_token: LexerToken; + result: ^AstExpression; + literal: ^AstLiteral; +begin + result := nil; + next_token := lexer_current(parser^.lexer); + + literal := parse_literal(parser); + + if (result = nil) & (literal <> nil) then + NEW(result); + + result^.kind := AstExpressionKind.literal; + result^.literal := literal + end; + if (result = nil) & (next_token.kind = lexerKindMinus) then + NEW(result); + next_token := parser_lex(parser^.lexer); + + result^.kind := AstExpressionKind.unary; + result^.unary.operator := AstUnaryOperator.minus; + result^.unary.operand := parse_factor(parser) + end; + if (result = nil) & (next_token.kind = lexerKindTilde) then + NEW(result); + next_token := parser_lex(parser^.lexer); + + result^.kind := AstExpressionKind.unary; + result^.unary.operator := AstUnaryOperator.not; + result^.unary.operand := parse_factor(parser) + end; + if (result = nil) & (next_token.kind = lexerKindLeftParen) then + next_token := parser_lex(parser^.lexer); + result := parse_expression(parser); + if result <> nil then + next_token := parser_lex(parser^.lexer) + end + end; + if (result = nil) & (next_token.kind = lexerKindIdentifier) then + NEW(result); + + result^.kind := AstExpressionKind.identifier; + result^.identifier := next_token.identifierKind; + + next_token := parser_lex(parser^.lexer) + end; + + return result +end; + +proc parse_designator(parser: ^Parser) -> ^AstExpression; +var + next_token: LexerToken; + inner_expression: ^AstExpression; + designator: ^AstExpression; + arguments: ^^AstExpression; + handled: Bool; +begin + designator := parse_factor(parser); + handled := designator <> nil; + next_token := lexer_current(parser^.lexer); + + while handled do + inner_expression := designator; + handled := false; + + if ~handled & (next_token.kind = lexerKindHat) then + NEW(designator); + + designator^.kind := AstExpressionKind.dereference; + designator^.reference := inner_expression; + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindLeftSquare) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := AstExpressionKind.array_access; + designator^.array_access.array := inner_expression; + designator^.array_access.index := parse_expression(parser); + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindDot) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := AstExpressionKind.field_access; + designator^.field_access.aggregate := inner_expression; + designator^.field_access.field := next_token.identifierKind; + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindLeftParen) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := AstExpressionKind.call; + designator^.call.callable := inner_expression; + designator^.call.argument_count := 0; + designator^.call.arguments := nil; + + if next_token.kind <> lexerKindRightParen then + designator^.arguments := malloc(#size(^AstExpression)); + designator^.argument_count := 1; + designator^.arguments^ := parse_expression(parser); + + next_token := lexer_current(parser^.lexer); + + while next_token.kind = lexerKindComma do + next_token := parser_lex(parser^.lexer); + + designator^.argument_count := designator^.argument_count + 1; + designator^.arguments := realloc(designator^.arguments, #size(^AstExpression) * designator^.argument_count); + arguments := designator^.arguments + (designator^.argument_count - 1u); + arguments^ := parse_expression(parser); + + next_token := lexer_current(parser^.lexer) + end + end; + + next_token := parser_lex(parser^.lexer); + handled := true + end + end; + + return designator +end; + +proc parse_binary_expression(parser: ^Parser, left: ^AstExpression, operator: AstBinaryOperator) -> ^AstExpression; +var + next_token: LexerToken; + result: ^AstExpression; + right: ^AstExpression; +begin + next_token := parser_lex(parser^.lexer); + right := parse_designator(parser); + result := nil; + + if right <> nil then + NEW(result); + result^.kind := AstExpressionKind.binary; + result^.binary.operator := operator; + result^.binary.lhs := left; + result^.binary.rhs := right + end; + + return result +end; + +proc parse_expression(parser: ^Parser) -> ^AstExpression; +var + next_token: LexerToken; + left: ^AstExpression; + result: ^AstExpression; + written_bytes: Word; +begin + left := parse_designator(parser); + result := nil; + next_token := lexer_current(parser^.lexer); + + if left <> nil then + if (result = nil) & (next_token.kind = lexerKindNotEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.not_equals) + end; + if (result = nil) & (next_token.kind = lexerKindEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.equals) + end; + if (result = nil) & (next_token.kind = lexerKindGreaterThan) then + result := parse_binary_expression(parser, left, AstBinaryOperator.greater) + end; + if (result = nil) & (next_token.kind = lexerKindLessThan) then + result := parse_binary_expression(parser, left, AstBinaryOperator.less) + end; + if (result = nil) & (next_token.kind = lexerKindGreaterEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.greater_equal) + end; + if (result = nil) & (next_token.kind = lexerKindLessEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.less_equal) + end; + if (result = nil) & (next_token.kind = lexerKindAnd) then + result := parse_binary_expression(parser, left, AstBinaryOperator.conjunction) + end; + if (result = nil) & (next_token.kind = lexerKindOr) then + result := parse_binary_expression(parser, left, AstBinaryOperator.disjunction) + end; + if (result = nil) & (next_token.kind = lexerKindMinus) then + result := parse_binary_expression(parser, left, AstBinaryOperator.subtraction) + end; + if (result = nil) & (next_token.kind = lexerKindPlus) then + result := parse_binary_expression(parser, left, AstBinaryOperator.sum) + end; + if (result = nil) & (next_token.kind = lexerKindAsterisk) then + result := parse_binary_expression(parser, left, AstBinaryOperator.multiplication) + end + end; + if (result = nil) & (left <> nil) then + result := left + end; + + return result +end; + +proc parse_return_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.return_statement; + + token := parser_lex(parser^.lexer); + result^.return_statement := parse_expression(parser); + + return result +end; + +proc parse_assignment_statement(parser: ^Parser, assignee: ^AstExpression) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.assignment_statement; + result^.assignment_statement.assignee := assignee; + + token := parser_lex(parser^.lexer); + result^.assignment_statement.assignment := parse_expression(parser); + + return result +end; + +proc parse_call_statement(parser: ^Parser, call: ^AstExpression) -> ^AstStatement; +var + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.call_statement; + result^.call_statement := call; + + return result +end; + +proc parse_compound_statement(parser: ^Parser) -> AstCompoundStatement; +var + result: AstCompoundStatement; + token: LexerToken; + current_statement: ^^AstStatement; + old_count: Word; +begin + result.count := 0u; + result.statements := nil; + + token := lexer_current(parser^.lexer); + + while token.kind <> lexerKindEnd do + old_count := result.count; + result.count := result.count + 1u; + + result.statements := realloc(result.statements, #size(^AstStatement) * result.count); + current_statement := result.statements + old_count; + + current_statement^ := parse_statement(parser); + + token := lexer_current(parser^.lexer) + end; + + return result +end; + +proc parse_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + statement: ^AstStatement; + designator: ^AstExpression; +begin + statement := nil; + token := parser_lex(parser^.lexer); + + if token.kind = lexerKindIf then + statement := parse_if_statement(parser) + end; + if token.kind = lexerKindWhile then + statement := parse_while_statement(parser) + end; + if token.kind = lexerKindReturn then + statement := parse_return_statement(parser) + end; + if token.kind = lexerKindIdentifier then + designator := parse_designator(parser); + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindAssignment then + statement := parse_assignment_statement(parser, designator) + end; + if token.kind <> lexerKindAssignment then + statement := parse_call_statement(parser, designator) + end + end; + return statement +end; + +proc parse_if_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.if_statement; + + token := parser_lex(parser^.lexer); + result^.if_statement.condition := parse_expression(parser); + result^.if_statement.branch := parse_compound_statement(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_while_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.while_statement; + + token := parser_lex(parser^.lexer); + result^.while_statement.condition := parse_expression(parser); + result^.while_statement.body := parse_compound_statement(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_statement_part(parser: ^Parser) -> AstCompoundStatement; +var + token: LexerToken; + compound: AstCompoundStatement; +begin + compound.count := 0; + compound.statements := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindBegin then + compound := parse_compound_statement(parser) + end; + + return compound +end; + +proc parse_procedure_heading(parser: ^Parser) -> ^AstProcedureDeclaration; +var + token: LexerToken; + declaration: ^AstProcedureDeclaration; + parameter_index: Word; + current_parameter: ^AstTypedDeclaration; +begin + NEW(declaration); + + token := parser_lex(parser^.lexer); + declaration^.name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + declaration^.parameters := nil; + declaration^.parameter_count := 0u; + + token := parser_lex(parser^.lexer); + while token.kind <> lexerKindRightParen do + parameter_index := declaration^.parameter_count; + declaration^.parameter_count := declaration^.parameter_count + 1; + declaration^.parameters := realloc(declaration^.parameters, #size(AstTypedDeclaration) * declaration^.parameter_count); + + current_parameter := declaration^.parameters + parameter_index; + + current_parameter^.identifier := token.identifierKind; + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + current_parameter^.type_expression := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + if token.kind = lexerKindComma then + token := parser_lex(parser^.lexer) + end + end; + token := parser_lex(parser^.lexer); + declaration^.return_type := nil; + + (* Check for the return type and write it. *) + if token.kind = lexerKindArrow then + token := parser_lex(parser^.lexer); + declaration^.return_type := parse_type_expression(parser); + token := parser_lex(parser^.lexer) + end; + token := parser_lex(parser^.lexer); + + return declaration +end; + +proc parse_procedure_declaration(parser: ^Parser) -> ^AstProcedureDeclaration; +var + token: LexerToken; + declaration: ^AstProcedureDeclaration; +begin + declaration := parse_procedure_heading(parser); + + declaration^.constants := parse_constant_part(parser); + declaration^.variables := parse_variable_part(parser); + declaration^.statements := parse_statement_part(parser); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + return declaration +end; + +proc parse_procedure_part(parser: ^Parser) -> ^^AstProcedureDeclaration; +var + token: LexerToken; + current_declaration: ^^AstProcedureDeclaration; + result: ^^AstProcedureDeclaration; + declaration_count: Word; + declaration_index: Word; +begin + token := lexer_current(parser^.lexer); + declaration_count := 0u; + declaration_index := 0u; + + result := malloc(#size(^AstProcedureDeclaration)); + + while token.kind = lexerKindProc do + declaration_count := declaration_count + 1u; + result := realloc(result, #size(^AstProcedureDeclaration) * (declaration_count + 1)); + current_declaration := result + declaration_index; + + current_declaration^ := parse_procedure_declaration(parser); + token := lexer_current(parser^.lexer); + declaration_index := declaration_count + end; + current_declaration := result + declaration_index; + current_declaration^ := nil; + + return result +end; + +proc parse_module(parser: ^Parser) -> ^AstModule; +var + token: LexerToken; + result: ^AstModule; +begin + NEW(result); + token := parser_lex(parser^.lexer); + result^.main := true; + + if token.kind = lexerKindModule then + result^.main := false + end; + token := parser_lex(parser^.lexer); + + (* Write the module body. *) + token := parser_lex(parser^.lexer); + + result^.imports := parse_import_part(parser); + result^.constants := parse_constant_part(parser); + result^.types := parse_type_part(parser); + + result^.variables := parse_variable_part(parser); + result^.procedures := parse_procedure_part(parser); + result^.statements := parse_statement_part(parser); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse*(lexer: ^Lexer) -> ^AstModule; +var + parser: Parser; +begin + parser.lexer := lexer; + + return parse_module(@parser) +end; + +end. diff --git a/source/Transpiler.def b/source/Transpiler.def new file mode 100644 index 0000000..5f8c219 --- /dev/null +++ b/source/Transpiler.def @@ -0,0 +1,20 @@ +DEFINITION MODULE Transpiler; + +FROM FIO IMPORT File; + +FROM Common IMPORT ShortString; +FROM Lexer IMPORT PLexer, Lexer; +FROM Parser IMPORT PAstModule; + +TYPE + TranspilerContext = RECORD + input_name: ShortString; + output: File; + definition: File; + indentation: CARDINAL + END; + PTranspilerContext = POINTER TO TranspilerContext; + +PROCEDURE transpile(ast_module: PAstModule; output: File; definition: File; input_name: ShortString); + +END Transpiler. diff --git a/source/Transpiler.elna b/source/Transpiler.elna new file mode 100644 index 0000000..5a65036 --- /dev/null +++ b/source/Transpiler.elna @@ -0,0 +1,631 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +from FIO import File, WriteNBytes, WriteLine, WriteChar, WriteString; +from NumberIO import IntToStr; + +import common, Parser; + +type + TranspilerContext* = record + input_name: String; + output: File; + definition: File; + indentation: Word + end; + +proc indent(context: ^TranspilerContext); +var + count: Word; +begin + count := 0; + + while count < context^.indentation do + WriteString(context^.output, " "); + count := count + 1u + end +end; + +(* Write a semicolon followed by a newline. *) +proc write_semicolon(output: File); +begin + WriteChar(output, ';'); + WriteLine(output) +end; + +proc transpile_import_statement(context: ^TranspilerContext, import_statement: ^AstImportStatement); +var + current_symbol: ^Identifier; +begin + WriteString(context^.output, "FROM "); + transpile_identifier(context, import_statement^.package); + + WriteString(context^.output, " IMPORT "); + + current_symbol := import_statement^.symbols; + transpile_identifier(context, current_symbol^); + current_symbol := current_symbol + 1; + + while current_symbol^[1] <> '\0' do + WriteString(context^.output, ", "); + transpile_identifier(context, current_symbol^); + current_symbol := current_symbol + 1; + end; + write_semicolon(context^.output) +end; + +proc transpile_import_part(context: ^TranspilerContext, imports: ^^AstImportStatement); +var + import_statement: ^AstImportStatement; +begin + while imports^ <> nil do + transpile_import_statement(context, imports^); + imports := imports + 1 + end; + WriteLine(context^.output) +end; + +proc transpile_constant_declaration(context: ^TranspilerContext, declaration: ^AstConstantDeclaration); +var + buffer: [20]Char; +begin + WriteString(context^.output, " "); + transpile_identifier(context, declaration^.constant_name); + + WriteString(context^.output, " = "); + + IntToStr(declaration^.constant_value, 0, buffer); + WriteString(context^.output, buffer); + + write_semicolon(context^.output) +end; + +proc transpile_constant_part(context: ^TranspilerContext, declarations: ^^AstConstantDeclaration, extra_newline: Bool); +var + current_declaration: ^^AstConstantDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, "CONST"); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_constant_declaration(context, current_declaration^); + + current_declaration := current_declaration + 1 + end; + if extra_newline then + WriteLine(context^.output) + end + end +end; + +proc transpile_module(context: ^TranspilerContext, result: ^AstModule); +begin + if result^.main = false then + WriteString(context^.output, "IMPLEMENTATION ") + end; + WriteString(context^.output, "MODULE "); + + (* Write the module name and end the line with a semicolon and newline. *) + transpile_module_name(context); + + write_semicolon(context^.output); + WriteLine(context^.output); + + (* Write the module body. *) + + transpile_import_part(context, result^.imports); + transpile_constant_part(context, result^.constants, true); + transpile_type_part(context, result^.types); + transpile_variable_part(context, result^.variables, true); + transpile_procedure_part(context, result^.procedures); + transpile_statement_part(context, result^.statements); + + WriteString(context^.output, "END "); + transpile_module_name(context); + + WriteChar(context^.output, "."); + WriteLine(context^.output) +end; + +proc transpile_type_fields(context: ^TranspilerContext, fields: ^AstFieldDeclaration); +var + current_field: ^AstFieldDeclaration; +begin + current_field := fields; + + while current_field^.field_name[1] <> '\0' do + WriteString(context^.output, " "); + transpile_identifier(context, current_field^.field_name); + + WriteString(context^.output, ": "); + transpile_type_expression(context, current_field^.field_type); + + current_field := current_field + 1; + + if current_field^.field_name[1] <> '\0' then + WriteChar(context^.output, ';') + end; + WriteLine(context^.output) + end +end; + +proc transpile_record_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +begin + WriteString(context^.output, "RECORD"); + WriteLine(context^.output); + transpile_type_fields(context, type_expression^.fields); + WriteString(context^.output, " END") +end; + +proc transpile_pointer_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +begin + WriteString(context^.output, "POINTER TO "); + + transpile_type_expression(context, type_expression^.target) +end; + +proc transpile_array_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +var + buffer: [20]Char; +begin + WriteString(context^.output, "ARRAY"); + + if type_expression^.length <> 0 then + WriteString(context^.output, "[1.."); + + IntToStr(type_expression^.length, 0, buffer); + WriteString(context^.output, buffer); + + WriteChar(context^.output, ']') + end; + WriteString(context^.output, " OF "); + + transpile_type_expression(context, type_expression^.base) +end; + +proc transpile_enumeration_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +var + current_case: ^Identifier; +begin + current_case := type_expression^.cases; + + WriteString(context^.output, "("); + WriteLine(context^.output); + WriteString(context^.output, " "); + transpile_identifier(context, current_case^); + current_case := current_case + 1; + + while current_case^[1] <> '\0' do + WriteChar(context^.output, ','); + WriteLine(context^.output); + WriteString(context^.output, " "); + transpile_identifier(context, current_case^); + + current_case := current_case + 1 + end; + WriteLine(context^.output); + WriteString(context^.output, " )") +end; + +proc transpile_identifier(context: ^TranspilerContext, identifier: Identifier); +var + written_bytes: Word; +begin + written_bytes := WriteNBytes(context^.output, cast(identifier[1]: Word), @identifier[2]) +end; + +proc transpile_procedure_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +var + result: ^AstTypeExpression; + current_parameter: ^^AstTypeExpression; + parameter_count: Word; +begin + WriteString(context^.output, "PROCEDURE("); + current_parameter := type_expression^.parameters; + + while current_parameter^ <> nil do + transpile_type_expression(context, current_parameter^); + + current_parameter := current_parameter + 1; + + if current_parameter^ <> nil then + WriteString(context^.output, ", ") + end + end; + WriteChar(context^.output, ')') +end; + +proc transpile_type_expression(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +begin + if type_expression^.kind = astTypeExpressionKindRecord then + transpile_record_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindEnumeration then + transpile_enumeration_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindArray then + transpile_array_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindPointer then + transpile_pointer_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindProcedure then + transpile_procedure_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindNamed then + transpile_identifier(context, type_expression^.name) + end +end; + +proc transpile_type_declaration(context: ^TranspilerContext, declaration: ^AstTypedDeclaration); +var + written_bytes: Word; +begin + WriteString(context^.output, " "); + + transpile_identifier(context^.output, declaration^.identifier); + WriteString(context^.output, " = "); + + transpile_type_expression(context, declaration^.type_expression); + write_semicolon(context^.output) +end; + +proc transpile_type_part(context: ^TranspilerContext, declarations: ^^AstTypedDeclaration); +var + current_declaration: ^^AstTypedDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, "TYPE"); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_type_declaration(context, current_declaration^); + + current_declaration := current_declaration + 1 + end; + WriteLine(context^.output) + end +end; + +proc transpile_variable_declaration(context: ^TranspilerContext, declaration: ^AstVariableDeclaration); +begin + WriteString(context^.output, " "); + transpile_identifier(context, declaration^.variable_name); + + WriteString(context^.output, ": "); + + transpile_type_expression(context, declaration^.variable_type); + write_semicolon(context^.output) +end; + +proc transpile_variable_part(context: ^TranspilerContext, declarations: ^^AstVariableDeclaration, extra_newline: Bool); +var + current_declaration: ^^AstVariableDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, "VAR"); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_variable_declaration(context, current_declaration^); + + current_declaration := current_declaration + 1 + end; + if extra_newline then + WriteLine(context^.output) + end + end +end; + +proc transpile_procedure_heading(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration); +var + parameter_index: Word; + current_parameter: ^AstTypedDeclaration; +begin + WriteString(context^.output, "PROCEDURE "); + transpile_identifier(context, declaration^.name); + WriteChar(context^.output, '('); + + parameter_index := 0; + current_parameter := declaration^.parameters; + + while parameter_index < declaration^.parameter_count do + transpile_identifier(context, current_parameter^.identifier); + WriteString(context^.output, ": "); + transpile_type_expression(context, current_parameter^.type_expression); + + parameter_index := parameter_index + 1u; + current_parameter := current_parameter + 1; + + if parameter_index <> declaration^.parameter_count then + WriteString(context^.output, "; ") + end + end; + + WriteString(context^.output, ")"); + + (* Check for the return type and write it. *) + if declaration^.return_type <> nil then + WriteString(context^.output, ": "); + transpile_type_expression(context, declaration^.return_type) + end; + write_semicolon(context^.output) +end; + +proc transpile_unary_operator(context: ^TranspilerContext, operator: AstUnaryOperator); +begin + if operator = AstUnaryOperator.minus then + WriteChar(context^.output, '-') + end; + if operator = AstUnaryOperator.not then + WriteChar(context^.output, '~') + end +end; + +proc transpile_binary_operator(context: ^TranspilerContext, operator: AstBinaryOperator); +begin + case operator of + AstBinaryOperator.sum: WriteChar(context^.output, '+') + | AstBinaryOperator.subtraction: WriteChar(context^.output, '-') + | AstBinaryOperator.multiplication: WriteChar(context^.output, '*') + | AstBinaryOperator.equals: WriteChar(context^.output, '=') + | AstBinaryOperator.not_equals: WriteChar(context^.output, '#') + | AstBinaryOperator.less: WriteChar(context^.output, '<') + | AstBinaryOperator.greater: WriteChar(context^.output, '>') + | AstBinaryOperator.less_equal: WriteString(context^.output, "<=") + | AstBinaryOperator.greater_equal: WriteString(context^.output, ">=") + | AstBinaryOperator.disjunction: WriteString(context^.output, "OR") + | AstBinaryOperatorConjunction: WriteString(context^.output, "AND") + end +end; + +proc transpile_expression(context: ^TranspilerContext, expression: ^AstExpression); +var + literal: ^AstLiteral; + buffer: [20]Char; + argument_index: Word; + current_argument: ^^AstExpression; +begin + if expression^.kind = astExpressionKindLiteral then + literal := expression^.literal; + + if literal^.kind = AstLiteralKind.integer then + IntToStr(literal^.integer, 0, buffer); + WriteString(context^.output, buffer) + end; + if literal^.kind = AstLiteralKind.string then + WriteString(context^.output, literal^.string) + end; + if literal^.kind = AstLiteralKind.null then + WriteString(context^.output, "NIL") + end; + if (literal^.kind = AstLiteralKind.boolean) & literal^.boolean then + WriteString(context^.output, "TRUE") + end; + if (literal^.kind = AstLiteralKind.boolean) & (literal^.boolean = false) then + WriteString(context^.output, "FALSE") + end + end; + if expression^.kind = astExpressionKindIdentifier then + transpile_identifier(context, expression^.identifier) + end; + if expression^.kind = astExpressionKindDereference then + transpile_expression(context, expression^.reference); + WriteChar(context^.output, '^') + end; + if expression^.kind = astExpressionKindArrayAccess then + transpile_expression(context, expression^.array); + WriteChar(context^.output, '['); + transpile_expression(context, expression^.index); + WriteChar(context^.output, ']') + end; + if expression^.kind = astExpressionKindFieldAccess then + transpile_expression(context, expression^.aggregate); + WriteChar(context^.output, '.'); + transpile_identifier(contextexpression^.field) + end; + if expression^.kind = astExpressionKindUnary then + transpile_unary_operator(context, expression^.unary_operator); + transpile_expression(context, expression^.unary_operand) + end; + if expression^.kind = astExpressionKindBinary then + WriteChar(context^.output, '('); + transpile_expression(context, expression^.lhs); + WriteChar(context^.output, ' '); + transpile_binary_operator(context, expression^.binary_operator); + WriteChar(context^.output, ' '); + transpile_expression(context, expression^.rhs); + WriteChar(context^.output, ')') + end; + if expression^.kind = astExpressionKindCall then + transpile_expression(context, expression^.callable); + WriteChar(context^.output, '('); + + current_argument := expression^.arguments; + if expression^.argument_count > 0 then + transpile_expression(context, current_argument^); + + argument_index := 1u; + current_argument := current_argument + 1; + + while argument_index < expression^.argument_count do + WriteString(context^.output, ", "); + + transpile_expression(context, current_argument^); + + current_argument := current_argument + 1; + argument_index := argument_index + 1u + end + end; + WriteChar(context^.output, ')') + end +end; + +proc transpile_if_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + WriteString(context^.output, "IF "); + transpile_expression(context, statement^.if_condition); + + WriteString(context^.output, " THEN"); + WriteLine(context^.output); + context^.indentation := context^.indentation + 1u; + + transpile_compound_statement(context, statement^.if_branch); + context^.indentation := context^.indentation - 1u; + indent(context); + WriteString(context^.output, "END") +end; + +proc transpile_while_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + WriteString(context^.output, "WHILE "); + transpile_expression(context, statement^.while_condition); + + WriteString(context^.output, " DO"); + WriteLine(context^.output); + context^.indentation := context^.indentation + 1u; + + transpile_compound_statement(context, statement^.while_body); + context^.indentation := context^.indentation - 1u; + indent(context); + WriteString(context^.output, "END") +end; + +proc transpile_assignment_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + transpile_expression(context, statement^.assignee); + WriteString(context^.output, " := "); + transpile_expression(context, statement^.assignment) +end; + +proc transpile_return_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + WriteString(context^.output, "RETURN "); + + transpile_expression(context, statement^.returned) +end; + +proc transpile_compound_statement(context: ^TranspilerContext, statement: AstCompoundStatement); +var + current_statement: ^^AstStatement; + index: Word; +begin + index := 0; + current_statement := statement.statements; + + while index < statement.count do + transpile_statement(context, current_statement^); + + current_statement := current_statement + 1; + index := index + 1u; + + if index <> statement.count then + WriteChar(context^.output, ';') + end; + WriteLine(context^.output) + end +end; + +proc transpile_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + indent(context); + + if statement^.kind = astStatementKindIf then + transpile_if_statement(context, statement) + end; + if statement^.kind = astStatementKindWhile then + transpile_while_statement(context, statement) + end; + if statement^.kind = astStatementKindReturn then + transpile_return_statement(context, statement) + end; + if statement^.kind = astStatementKindAssignment then + transpile_assignment_statement(context, statement) + end; + if statement^.kind = astStatementKindCall then + transpile_expression(context, statement^.call) + end +end; + +proc transpile_statement_part(context: ^TranspilerContext, compound: AstCompoundStatement); +begin + if compound.count > 0 then + WriteString(context^.output, "BEGIN"); + WriteLine(context^.output); + + context^.indentation := context^.indentation + 1u; + transpile_compound_statement(context, compound); + context^.indentation := context^.indentation - 1u; + end +end; + +proc transpile_procedure_declaration(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration); +begin + transpile_procedure_heading(context, declaration); + + transpile_constant_part(context, declaration^.constants, false); + transpile_variable_part(context, declaration^.variables, false); + transpile_statement_part(context, declaration^.statements); + + WriteString(context^.output, "END "); + transpile_identifier(context^.output, declaration^.name); + + write_semicolon(context^.output) +end; + +proc transpile_procedure_part(context: ^TranspilerContext, declaration: ^^AstProcedureDeclaration); +begin + while declaration^ <> nil do + transpile_procedure_declaration(context, declaration^); + WriteLine(context^.output); + + declaration := declaration + 1 + end +end; + +proc transpile_module_name(context: ^TranspilerContext); +var + counter: Word; + last_slash: Word; +begin + counter := 1u; + last_slash := 0u; + + while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do + if context^.input_name[counter] = '/' then + last_slash := counter + end; + counter := counter + 1u + end; + + if last_slash = 0u then + counter := 1u + end; + if last_slash <> 0u then + counter := last_slash + 1u + end; + while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do + WriteChar(context^.output, context^.input_name[counter]); + counter := counter + 1u + end +end; + +proc transpile*(ast_module: ^AstModule, output: File, definition: File, input_name: String); +var + context: TranspilerContext; +begin + context.input_name := input_name; + context.output := output; + context.definition := definition; + context.indentation := 0u; + + transpile_module(@context, ast_module) +end; + +end. diff --git a/source/cctype.elna b/source/cctype.elna new file mode 100644 index 0000000..3906cd1 --- /dev/null +++ b/source/cctype.elna @@ -0,0 +1,14 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +proc isdigit*(c: Int ) -> Int; extern; +proc isalnum*(c: Int) -> Int; extern; +proc isalpha*(c: Int) -> Int; extern; +proc isspace*(c: Int) -> Int; extern; + +proc tolower*(c: Int) -> Int; extern; +proc toupper*(c: Int) -> Int; extern; + +end. diff --git a/source/command_line_interface.elna b/source/command_line_interface.elna new file mode 100644 index 0000000..040fdeb --- /dev/null +++ b/source/command_line_interface.elna @@ -0,0 +1,93 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) + +(* + Command line handling. +*) +module; + +import cstdlib, cstring, common; + +type + CommandLine* = record + input: ^Char; + output: ^Char; + lex: Bool; + parse: Bool + end; + +proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine; +var + parameter: ^Char; + i: Int; + result: ^CommandLine; + parsed: Bool; +begin + i := 1; + result := cast(malloc(#size(CommandLine)): ^CommandLine); + result^.lex := false; + result^.parse := false; + result^.input := nil; + result^.output := nil; + + while i < argc & result <> nil do + parameter := (argv + i)^; + parsed := false; + + if strcmp(parameter, "--lex\0".ptr) = 0 then + parsed := true; + result^.lex := true + end; + if strcmp(parameter, "--parse\0".ptr) = 0 then + parsed := true; + result^.parse := true + end; + if strcmp(parameter, "-o\0".ptr) = 0 then + i := i + 1; + + if i = argc then + write_s("Fatal error: expecting a file name following -o."); + result := nil + end; + if i < argc then + parameter := (argv + i)^; + result^.output := parameter + end; + parsed := true + end; + if (parameter^ <> '-') & ~parsed then + parsed := true; + + if result^.input <> nil then + write_s("Fatal error: only one source file can be compiled at once. First given \""); + write_z(result^.input); + write_s("\", then \""); + write_z(parameter); + write_s("\".\n"); + result := nil + end; + if result <> nil then + result^.input := parameter + end + end; + if ~parsed then + write_s("Fatal error: unknown command line options: "); + + write_z(parameter); + write_s(".\n"); + + result := nil + end; + + i := i + 1 + end; + if result <> nil & result^.input = nil then + write_s("Fatal error: no input files.\n"); + result := nil + end; + + return result +end; + +end. diff --git a/source/common.elna b/source/common.elna new file mode 100644 index 0000000..e7b30ca --- /dev/null +++ b/source/common.elna @@ -0,0 +1,72 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +import cstring, cstdio; + +type + Identifier = [256]Char; + TextLocation* = record + line: Word; + column: Word + end; + +proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern; + +proc write_s*(value: String); +begin + (* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *) + write(1, cast(value.ptr: Pointer), cast(value.length: Int)) +end; + +proc write_z*(value: ^Char); +begin + write(1, cast(value: Pointer), cast(strlen(value): Int)) +end; + +proc write_b*(value: Bool); +begin + if value then + write_s("true") + else + write_s("false") + end +end; + +proc write_c*(value: Char); +begin + putchar(cast(value: Int)); + fflush(nil) +end; + +proc write_i*(value: Int); +var + digit: Int; + n: Word; + buffer: [10]Char; +begin + n := 10u; + + if value = 0 then + write_c('0') + end; + while value <> 0 do + digit := value % 10; + value := value / 10; + + buffer[n] := cast(cast('0': Int) + digit: Char); + n := n - 1u + end; + while n < 10u do + n := n + 1u; + write_c(buffer[n]) + end +end; + +proc write_u*(value: Word); +begin + write_i(cast(value: Int)) +end; + +end. diff --git a/source/cstdio.elna b/source/cstdio.elna new file mode 100644 index 0000000..c7507ff --- /dev/null +++ b/source/cstdio.elna @@ -0,0 +1,29 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +type + FILE* = record end; + +var + stdin*: ^FILE := extern; + stdout*: ^FILE := extern; + stderr*: ^FILE := extern; + +proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern; +proc fclose*(stream: ^FILE) -> Int; extern; +proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern; +proc rewind*(stream: ^FILE); extern; +proc ftell*(stream: ^FILE) -> Int; extern; +proc fflush*(stream: ^FILE) -> Int; extern; + +proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern; +proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern; + +proc perror(s: ^Char); extern; + +proc puts(s: ^Char) -> Int; extern; +proc putchar(c: Int) -> Int; extern; + +end. diff --git a/source/cstdlib.elna b/source/cstdlib.elna new file mode 100644 index 0000000..da2029c --- /dev/null +++ b/source/cstdlib.elna @@ -0,0 +1,15 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +proc malloc(size: Word) -> Pointer; extern; +proc free(ptr: Pointer); extern; +proc calloc(nmemb: Word, size: Word) -> Pointer; extern; +proc realloc(ptr: Pointer, size: Word) -> Pointer; extern; + +proc atoi(str: ^Char) -> Int; extern; + +proc exit(code: Int) -> !; extern; + +end. diff --git a/source/cstring.elna b/source/cstring.elna new file mode 100644 index 0000000..24d852a --- /dev/null +++ b/source/cstring.elna @@ -0,0 +1,15 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern; +proc memcpy(dst: Pointer, src: Pointer, n: Word); extern; + +proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern; +proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern; +proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern; +proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern; +proc strlen(ptr: ^Char) -> Word; extern; + +end. diff --git a/source/lexer.elna b/source/lexer.elna new file mode 100644 index 0000000..d5f529b --- /dev/null +++ b/source/lexer.elna @@ -0,0 +1,952 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +import cstdio, cstring, cctype, cstdlib, common; + +const + CHUNK_SIZE := 85536u; + +type + (* + * Classification table assigns each possible character to a group (class). All + * characters of the same group are handled equivalently. + * + * Classification: + *) + TransitionClass = ( + invalid, + digit, + alpha, + space, + colon, + equals, + left_paren, + right_paren, + asterisk, + underscore, + single, + hex, + zero, + x, + eof, + dot, + minus, + single_quote, + double_quote, + greater, + less, + other + ); + TransitionState = ( + start, + colon, + identifier, + decimal, + greater, + minus, + left_paren, + less, + dot, + comment, + closing_comment, + character, + string, + leading_zero, + decimal_suffix, + finish + ); + LexerToken = record + kind: LexerKind; + value: union + booleanKind: Bool; + identifierKind: Identifier; + integerKind: Int; + stringKind: String + end; + start_location: TextLocation; + end_location: TextLocation + end; + TransitionAction = proc(^Lexer, ^LexerToken); + Transition = record + action: TransitionAction; + next_state: TransitionState + end; + TransitionClasses = [22]Transition; + + BufferPosition* = record + iterator: ^Char; + location: TextLocation + end; + Lexer* = record + input: ^FILE; + buffer: ^Char; + size: Word; + length: Word; + start: BufferPosition; + current: BufferPosition + end; + LexerKind* = ( + unknown, + identifier, + _if, + _then, + _else, + _elsif, + _while, + _do, + _proc, + _begin, + _end, + _extern, + _const, + _var, + _case, + _of, + _type, + _record, + _union, + pipe, + to, + boolean, + null, + and, + _or, + _xor, + not, + _return, + _cast, + shift_left, + shift_right, + left_paren, + right_paren, + left_square, + right_square, + greater_equal, + less_equal, + greater_than, + less_than, + not_equal, + equal, + semicolon, + dot, + comma, + plus, + minus, + multiplication, + division, + remainder, + assignment, + colon, + hat, + at, + comment, + integer, + word, + character, + string, + _defer, + exclamation, + arrow, + trait, + _program, + _module, + _import + ); + +var + classification: [128]TransitionClass; + transitions: [16]TransitionClasses; + +proc initialize_classification(); +var + i: Word; +begin + classification[1] := TransitionClass.eof; (* NUL *) + classification[2] := TransitionClass.invalid; (* SOH *) + classification[3] := TransitionClass.invalid; (* STX *) + classification[4] := TransitionClass.invalid; (* ETX *) + classification[5] := TransitionClass.invalid; (* EOT *) + classification[6] := TransitionClass.invalid; (* EMQ *) + classification[7] := TransitionClass.invalid; (* ACK *) + classification[8] := TransitionClass.invalid; (* BEL *) + classification[9] := TransitionClass.invalid; (* BS *) + classification[10] := TransitionClass.space; (* HT *) + classification[11] := TransitionClass.space; (* LF *) + classification[12] := TransitionClass.invalid; (* VT *) + classification[13] := TransitionClass.invalid; (* FF *) + classification[14] := TransitionClass.space; (* CR *) + classification[15] := TransitionClass.invalid; (* SO *) + classification[16] := TransitionClass.invalid; (* SI *) + classification[17] := TransitionClass.invalid; (* DLE *) + classification[18] := TransitionClass.invalid; (* DC1 *) + classification[19] := TransitionClass.invalid; (* DC2 *) + classification[20] := TransitionClass.invalid; (* DC3 *) + classification[21] := TransitionClass.invalid; (* DC4 *) + classification[22] := TransitionClass.invalid; (* NAK *) + classification[23] := TransitionClass.invalid; (* SYN *) + classification[24] := TransitionClass.invalid; (* ETB *) + classification[25] := TransitionClass.invalid; (* CAN *) + classification[26] := TransitionClass.invalid; (* EM *) + classification[27] := TransitionClass.invalid; (* SUB *) + classification[28] := TransitionClass.invalid; (* ESC *) + classification[29] := TransitionClass.invalid; (* FS *) + classification[30] := TransitionClass.invalid; (* GS *) + classification[31] := TransitionClass.invalid; (* RS *) + classification[32] := TransitionClass.invalid; (* US *) + classification[33] := TransitionClass.space; (* Space *) + classification[34] := TransitionClass.single; (* ! *) + classification[35] := TransitionClass.double_quote; (* " *) + classification[36] := TransitionClass.other; (* # *) + classification[37] := TransitionClass.other; (* $ *) + classification[38] := TransitionClass.single; (* % *) + classification[39] := TransitionClass.single; (* & *) + classification[40] := TransitionClass.single_quote; (* ' *) + classification[41] := TransitionClass.left_paren; (* ( *) + classification[42] := TransitionClass.right_paren; (* ) *) + classification[43] := TransitionClass.asterisk; (* * *) + classification[44] := TransitionClass.single; (* + *) + classification[45] := TransitionClass.single; (* , *) + classification[46] := TransitionClass.minus; (* - *) + classification[47] := TransitionClass.dot; (* . *) + classification[48] := TransitionClass.single; (* / *) + classification[49] := TransitionClass.zero; (* 0 *) + classification[50] := TransitionClass.digit; (* 1 *) + classification[51] := TransitionClass.digit; (* 2 *) + classification[52] := TransitionClass.digit; (* 3 *) + classification[53] := TransitionClass.digit; (* 4 *) + classification[54] := TransitionClass.digit; (* 5 *) + classification[55] := TransitionClass.digit; (* 6 *) + classification[56] := TransitionClass.digit; (* 7 *) + classification[57] := TransitionClass.digit; (* 8 *) + classification[58] := TransitionClass.digit; (* 9 *) + classification[59] := TransitionClass.colon; (* : *) + classification[60] := TransitionClass.single; (* ; *) + classification[61] := TransitionClass.less; (* < *) + classification[62] := TransitionClass.equals; (* = *) + classification[63] := TransitionClass.greater; (* > *) + classification[64] := TransitionClass.other; (* ? *) + classification[65] := TransitionClass.single; (* @ *) + classification[66] := TransitionClass.alpha; (* A *) + classification[67] := TransitionClass.alpha; (* B *) + classification[68] := TransitionClass.alpha; (* C *) + classification[69] := TransitionClass.alpha; (* D *) + classification[70] := TransitionClass.alpha; (* E *) + classification[71] := TransitionClass.alpha; (* F *) + classification[72] := TransitionClass.alpha; (* G *) + classification[73] := TransitionClass.alpha; (* H *) + classification[74] := TransitionClass.alpha; (* I *) + classification[75] := TransitionClass.alpha; (* J *) + classification[76] := TransitionClass.alpha; (* K *) + classification[77] := TransitionClass.alpha; (* L *) + classification[78] := TransitionClass.alpha; (* M *) + classification[79] := TransitionClass.alpha; (* N *) + classification[80] := TransitionClass.alpha; (* O *) + classification[81] := TransitionClass.alpha; (* P *) + classification[82] := TransitionClass.alpha; (* Q *) + classification[83] := TransitionClass.alpha; (* R *) + classification[84] := TransitionClass.alpha; (* S *) + classification[85] := TransitionClass.alpha; (* T *) + classification[86] := TransitionClass.alpha; (* U *) + classification[87] := TransitionClass.alpha; (* V *) + classification[88] := TransitionClass.alpha; (* W *) + classification[89] := TransitionClass.alpha; (* X *) + classification[90] := TransitionClass.alpha; (* Y *) + classification[91] := TransitionClass.alpha; (* Z *) + classification[92] := TransitionClass.single; (* [ *) + classification[93] := TransitionClass.other; (* \ *) + classification[94] := TransitionClass.single; (* ] *) + classification[95] := TransitionClass.single; (* ^ *) + classification[96] := TransitionClass.underscore; (* _ *) + classification[97] := TransitionClass.other; (* ` *) + classification[98] := TransitionClass.hex; (* a *) + classification[99] := TransitionClass.hex; (* b *) + classification[100] := TransitionClass.hex; (* c *) + classification[101] := TransitionClass.hex; (* d *) + classification[102] := TransitionClass.hex; (* e *) + classification[103] := TransitionClass.hex; (* f *) + classification[104] := TransitionClass.alpha; (* g *) + classification[105] := TransitionClass.alpha; (* h *) + classification[106] := TransitionClass.alpha; (* i *) + classification[107] := TransitionClass.alpha; (* j *) + classification[108] := TransitionClass.alpha; (* k *) + classification[109] := TransitionClass.alpha; (* l *) + classification[110] := TransitionClass.alpha; (* m *) + classification[111] := TransitionClass.alpha; (* n *) + classification[112] := TransitionClass.alpha; (* o *) + classification[113] := TransitionClass.alpha; (* p *) + classification[114] := TransitionClass.alpha; (* q *) + classification[115] := TransitionClass.alpha; (* r *) + classification[116] := TransitionClass.alpha; (* s *) + classification[117] := TransitionClass.alpha; (* t *) + classification[118] := TransitionClass.alpha; (* u *) + classification[119] := TransitionClass.alpha; (* v *) + classification[120] := TransitionClass.alpha; (* w *) + classification[121] := TransitionClass.x; (* x *) + classification[122] := TransitionClass.alpha; (* y *) + classification[123] := TransitionClass.alpha; (* z *) + classification[124] := TransitionClass.other; (* { *) + classification[125] := TransitionClass.single; (* | *) + classification[126] := TransitionClass.other; (* } *) + classification[127] := TransitionClass.single; (* ~ *) + classification[128] := TransitionClass.invalid; (* DEL *) + + i := 129u; + while i <= 256u do + classification[i] := TransitionClass.other; + i := i + 1u + end +end; + +proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; +var + result: Bool; + index: Word; + continue: Bool; +begin + index := 0u; + result := true; + continue := (index < keyword.length) & (token_start.iterator <> token_end); + + while continue & result do + result := keyword[index] = token_start.iterator^ + or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^; + token_start.iterator := token_start.iterator + 1; + index := index + 1u; + continue := (index < keyword.length) & (token_start.iterator <> token_end) + end; + result := result & index = keyword.length; + + return result & (token_start.iterator = token_end) +end; + +(* Reached the end of file. *) +proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); +begin + token^.kind := LexerKind.unknown +end; + +proc increment(position: ^BufferPosition); +begin + position^.iterator := position^.iterator + 1 +end; + +(* Add the character to the token currently read and advance to the next character. *) +proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); +begin + increment(@lexer^.current) +end; + +(* The current character is not a part of the token. Finish the token already + * read. Don't advance to the next character. *) +proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); +begin + if lexer^.start.iterator^ = ':' then + token^.kind := LexerKind.colon + end; + if lexer^.start.iterator^ = '>' then + token^.kind := LexerKind.greater_than + end; + if lexer^.start.iterator^ = '<' then + token^.kind := LexerKind.less_than + end; + if lexer^.start.iterator^ = '(' then + token^.kind := LexerKind.left_paren + end; + if lexer^.start.iterator^ = '-' then + token^.kind := LexerKind.minus + end; + if lexer^.start.iterator^ = '.' then + token^.kind := LexerKind.dot + end +end; + +(* An action for tokens containing multiple characters. *) +proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); +begin + if lexer^.start.iterator^ = '<' then + if lexer^.current.iterator^ = '>' then + token^.kind := LexerKind.not_equal + end; + if lexer^.current.iterator^ = '=' then + token^.kind := LexerKind.less_equal + end + end; + if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then + token^.kind := LexerKind.greater_equal + end; + if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then + token^.kind := LexerKind.assignment + end; + if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then + token^.kind := LexerKind.arrow + end; + increment(@lexer^.current) +end; + +(* Skip a space. *) +proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); +begin + increment(@lexer^.start); + + if lexer^.start.iterator^ = '\n' then + lexer^.start.location.line := lexer^.start.location.line + 1u; + lexer^.start.location.column := 1u + end; + lexer^.current := lexer^.start +end; + +(* Delimited string action. *) +proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); +var + text_length: Word; +begin + if lexer^.start.iterator^ = '(' then + token^.kind := LexerKind.comment + end; + if lexer^.start.iterator^ = '"' then + text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); + + token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); + memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); + + token^.kind := LexerKind.character + end; + if lexer^.start.iterator^ = '\'' then + text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); + + token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); + memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); + + token^.kind := LexerKind.string + end; + increment(@lexer^.current) +end; + +(* Finalize keyword or identifier. *) +proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); +begin + token^.kind := LexerKind.identifier; + + token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char); + memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word)); + + if compare_keyword("program", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._program + end; + if compare_keyword("import", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._import + end; + if compare_keyword("const", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._const + end; + if compare_keyword("var", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._var + end; + if compare_keyword("if", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._if + end; + if compare_keyword("then", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._then + end; + if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._elsif + end; + if compare_keyword("else", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._else + end; + if compare_keyword("while", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._while + end; + if compare_keyword("do", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._do + end; + if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._proc + end; + if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._begin + end; + if compare_keyword("end", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._end + end; + if compare_keyword("type", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._type + end; + if compare_keyword("record", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._record + end; + if compare_keyword("union", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._union + end; + if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.null + end; + if compare_keyword("or", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._or + end; + if compare_keyword("return", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._return + end; + if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._defer + end; + if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.to + end; + if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._case + end; + if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._of + end; + if compare_keyword("module", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._module + end; + if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._xor + end; + if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.boolean; + token^.value.booleanKind := true + end; + if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.boolean; + token^.value.booleanKind := false + end +end; + +(* Action for tokens containing only one character. The character cannot be + * followed by other characters forming a composite token. *) +proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); +begin + if lexer^.current.iterator^ = '&' then + token^.kind := LexerKind.and + end; + if lexer^.current.iterator^ = ';' then + token^.kind := LexerKind.semicolon + end; + if lexer^.current.iterator^ = ',' then + token^.kind := LexerKind.comma + end; + if lexer^.current.iterator^ = '~' then + token^.kind := LexerKind.not + end; + if lexer^.current.iterator^ = ')' then + token^.kind := LexerKind.right_paren + end; + if lexer^.current.iterator^ = '[' then + token^.kind := LexerKind.left_square + end; + if lexer^.current.iterator^ = ']' then + token^.kind := LexerKind.right_square + end; + if lexer^.current.iterator^ = '^' then + token^.kind := LexerKind.hat + end; + if lexer^.current.iterator^ = '=' then + token^.kind := LexerKind.equal + end; + if lexer^.current.iterator^ = '+' then + token^.kind := LexerKind.plus + end; + if lexer^.current.iterator^ = '*' then + token^.kind := LexerKind.multiplication + end; + if lexer^.current.iterator^ = '/' then + token^.kind := LexerKind.division + end; + if lexer^.current.iterator^ = '%' then + token^.kind := LexerKind.remainder + end; + if lexer^.current.iterator^ = '@' then + token^.kind := LexerKind.at + end; + if lexer^.current.iterator^ = '|' then + token^.kind := LexerKind.pipe + end; + increment(@lexer^.current) +end; + +(* Handle an integer literal. *) +proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); +var + buffer: String; + integer_length: Word; + found: Bool; +begin + token^.kind := LexerKind.integer; + + integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word); + memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier)); + memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length); + + token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0'; + token^.value.integerKind := atoi(@token^.value.identifierKind[2]) +end; + +proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; +var + default_transition: Transition; + state_index: Int; +begin + default_transition.action := default_action; + default_transition.next_state := next_state; + state_index := cast(current_state: Int) + 1; + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition; + + return state_index +end; + +(* + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + * + * Each cell is a word long. + * - The least significant byte of the word is a row number (beginning with 0). + * It specifies the target state. "ff" means that this is an end state and no + * transition is possible. + * - The next byte is the action that should be performed when transitioning. + * For the meaning of actions see labels in the lex_next function, which + * handles each action. + *) +proc initialize_transitions(); +var + state_index: Int; +begin + (* Start state. *) + state_index := cast(TransitionState.start: Int) + 1; + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip; + transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start; + + transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon; + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren; + + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot; + + transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus; + + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character; + + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string; + + transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater; + + transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less; + + transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish; + + (* Colon state. *) + state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + (* Identifier state. *) + state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; + + (* Decimal state. *) + state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix; + + (* Greater state. *) + state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + (* Minus state. *) + state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; + + (* Left paren state. *) + state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment; + + (* Less state. *) + state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; + + (* Hexadecimal after 0x. *) + state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish; + + (* Comment. *) + state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment); + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + (* Closing comment. *) + state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment); + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited; + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + (* Character. *) + state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character); + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited; + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish; + + (* String. *) + state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string); + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited; + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish; + + (* Leading zero. *) + state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish; + + (* Digit with a character suffix. *) + state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish +end; + +proc lexer_make*(lexer: ^Lexer, input: ^FILE); +begin + lexer^.input := input; + lexer^.length := 0u; + + lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char); + memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE); + lexer^.size := CHUNK_SIZE +end; + +(* Returns the last read token. *) +proc lexer_current*(lexer: ^Lexer) -> LexerToken; +var + current_class: TransitionClass; + current_state: TransitionState; + current_transition: Transition; + result: LexerToken; + index1: Word; + index2: Word; +begin + lexer^.current := lexer^.start; + current_state := TransitionState.start; + + while current_state <> TransitionState.finish do + index1 := cast(lexer^.current.iterator^: Word) + 1u; + current_class := classification[index1]; + + index1 := cast(current_state: Word) + 1u; + index2 := cast(current_class: Word) + 1u; + + current_transition := transitions[index1][index2]; + if current_transition.action <> nil then + current_transition.action(lexer, @result) + end; + current_state := current_transition.next_state + end; + result.start_location := lexer^.start.location; + result.end_location := lexer^.current.location; + + return result +end; + +(* Read and return the next token. *) +proc lexer_lex*(lexer: ^Lexer) -> LexerToken; +var + result: LexerToken; +begin + if lexer^.length = 0u then + lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input); + lexer^.current.location.column := 1u; + lexer^.current.location.line := 1u; + lexer^.current.iterator := lexer^.buffer + end; + lexer^.start := lexer^.current; + + result := lexer_current(lexer); + return result +end; + +proc lexer_destroy*(lexer: ^Lexer); +begin + free(cast(lexer^.buffer: Pointer)) +end; + +proc lexer_initialize(); +begin + initialize_classification(); + initialize_transitions() +end; + +end. diff --git a/source/main.elna b/source/main.elna new file mode 100644 index 0000000..dae045b --- /dev/null +++ b/source/main.elna @@ -0,0 +1,841 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +program; + +import cstdio, cctype, common, command_line_interface, lexer; + +type + SourceFile* = record + buffer: [1024]Char; + handle: ^FILE; + size: Word; + index: Word + end; + StringBuffer* = record + data: Pointer; + size: Word; + capacity: Word + end; + SourceCode = record + position: TextLocation; + + input: Pointer; + empty: proc(Pointer) -> Bool; + advance: proc(Pointer); + head: proc(Pointer) -> Char + end; + Token* = record + kind: LexerKind; + value: union + int_value: Int; + string: String; + boolean_value: Bool; + char_value: Char + end + end; + Tokenizer* = record + length: Word; + data: ^Token + end; + +(* + Standard procedures. +*) +proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer; + return realloc(ptr, n * size) +end; + +proc substring(string: String, start: Word, count: Word) -> String; + return String(string.ptr + start, count) +end; + +proc open_substring(string: String, start: Word) -> String; + return substring(string, start, string.length - start) +end; + +proc string_dup(origin: String) -> String; +var + copy: ^Char; +begin + copy := cast(malloc(origin.length): ^Char); + strncpy(copy, origin.ptr, origin.length); + + return String(copy, origin.length) +end; + +proc string_buffer_new() -> StringBuffer; +var + result: StringBuffer; +begin + result.capacity := 64u; + result.data := malloc(result.capacity); + result.size := 0u; + + return result +end; + +proc string_buffer_push(buffer: ^StringBuffer, char: Char); +begin + if buffer^.size >= buffer^.capacity then + buffer^.capacity := buffer^.capacity + 1024u; + buffer^.data := realloc(buffer^.data, buffer^.capacity) + end; + cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char); + buffer^.size := buffer^.size + 1u +end; + +proc string_buffer_pop(buffer: ^StringBuffer, count: Word); +begin + buffer^.size := buffer^.size - count +end; + +proc string_buffer_clear(buffer: ^StringBuffer) -> String; +var + result: String; +begin + result := String(cast(buffer^.data: ^Char), buffer^.size); + buffer^.size := 0u; + return result +end; + +(* + Source code stream procedures. +*) + +proc read_source(filename: ^Char) -> ^SourceFile; +var + result: ^SourceFile; + file_handle: ^FILE; +begin + file_handle := fopen(filename, "rb\0".ptr); + + if file_handle <> nil then + result := cast(malloc(#size(SourceFile)): ^SourceFile); + result^.handle := file_handle; + result^.size := 0u; + result^.index := 1u + end; + return result +end; + +proc source_file_empty(source_input: Pointer) -> Bool; +var + source_file: ^SourceFile; +begin + source_file := cast(source_input: ^SourceFile); + + if source_file^.index > source_file^.size then + source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle); + source_file^.index := 1u + end; + + return source_file^.size = 0u +end; + +proc source_file_head(source_input: Pointer) -> Char; +var + source_file: ^SourceFile; +begin + source_file := cast(source_input: ^SourceFile); + + return source_file^.buffer[source_file^.index] +end; + +proc source_file_advance(source_input: Pointer); +var + source_file: ^SourceFile; +begin + source_file := cast(source_input: ^SourceFile); + + source_file^.index := source_file^.index + 1u +end; + +proc source_code_empty(source_code: ^SourceCode) -> Bool; + return source_code^.empty(source_code^.input) +end; + +proc source_code_head(source_code: SourceCode) -> Char; + return source_code.head(source_code.input) +end; + +proc source_code_advance(source_code: ^SourceCode); +begin + source_code^.advance(source_code^.input); + source_code^.position.column := source_code^.position.column +end; + +proc source_code_break(source_code: ^SourceCode); +begin + source_code^.position.line := source_code^.position.line + 1u; + source_code^.position.column := 0u +end; + +proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool; + return ~source_code_empty(source_code) & source_code_head(source_code^) = expected +end; + +(* + Token procedures. +*) + +proc lexer_escape(escape: Char, result: ^Char) -> Bool; +var + successful: Bool; +begin + case escape of + 'n': + result^ := '\n'; + successful := true + | 'a': + result^ := '\a'; + successful := true + | 'b': + result^ := '\b'; + successful := true + | 't': + result^ := '\t'; + successful := true + | 'f': + result^ := '\f'; + successful := true + | 'r': + result^ := '\r'; + successful := true + | 'v': + result^ := '\v'; + successful := true + | '\\': + result^ := '\\'; + successful := true + | '\'': + result^ := '\''; + successful := true + | '"': + result^ := '"'; + successful := true + | '?': + result^ := '\?'; + successful := true + | '0': + result^ := '\0'; + successful := true + else + successful := false + end; + return successful +end; + +(* Skip spaces. *) +proc lexer_spaces(source_code: ^SourceCode); +var + current: Char; +begin + while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do + current := source_code_head(source_code^); + + if current = '\n' then + source_code_break(source_code) + end; + source_code_advance(source_code) + end +end; + +(* Checker whether the character is allowed in an identificator. *) +proc lexer_is_ident(char: Char) -> Bool; + return isalnum(cast(char: Int)) <> 0 or char = '_' +end; + +proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer); +var + content_length: Word; +begin + while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do + string_buffer_push(token_content, source_code_head(source_code^)); + source_code_advance(source_code) + end +end; + +proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; +var + trailing: Word; +begin + trailing := 0u; + + while ~source_code_empty(source_code) & trailing < 2u do + if source_code_head(source_code^) = '*' then + string_buffer_push(token_content, '*'); + trailing := 1u + elsif source_code_head(source_code^) = ')' & trailing = 1u then + string_buffer_pop(token_content, 1u); + trailing := 2u + else + string_buffer_push(token_content, source_code_head(source_code^)); + trailing := 0u + end; + source_code_advance(source_code) + end; + + return trailing = 2u +end; + +proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool; +var + successful: Bool; +begin + successful := ~source_code_empty(source_code); + + if successful then + if source_code_head(source_code^) = '\\' then + source_code_advance(source_code); + + successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content) + else + token_content^ := source_code_head(source_code^); + successful := true + end + end; + if successful then + source_code_advance(source_code) + end; + return successful +end; + +proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; +var + token_end, constructed_string: ^Char; + token_length: Word; + is_valid: Bool := true; + next_char: Char; +begin + while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do + is_valid := lexer_character(source_code, @next_char); + + if is_valid then + string_buffer_push(token_content, next_char) + end + end; + + if is_valid & source_code_expect(source_code, '"') then + source_code_advance(source_code) + else + is_valid := false + end; + return is_valid +end; + +proc lexer_number(source_code: ^SourceCode, token_content: ^Int); +begin + token_content^ := 0; + + while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do + token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int)); + + source_code_advance(source_code) + end +end; + +(* Categorize an identifier. *) +proc lexer_categorize(token_content: String) -> Token; +var + current_token: Token; +begin + if token_content = "if" then + current_token.kind := LexerKind._if + elsif token_content = "then" then + current_token.kind := LexerKind._then + elsif token_content = "else" then + current_token.kind := LexerKind._else + elsif token_content = "elsif" then + current_token.kind := LexerKind._elsif + elsif token_content = "while" then + current_token.kind := LexerKind._while + elsif token_content = "do" then + current_token.kind := LexerKind._do + elsif token_content = "proc" then + current_token.kind := LexerKind._proc + elsif token_content = "begin" then + current_token.kind := LexerKind._begin + elsif token_content = "end" then + current_token.kind := LexerKind._end + elsif token_content = "extern" then + current_token.kind := LexerKind._extern + elsif token_content = "const" then + current_token.kind := LexerKind._const + elsif token_content = "var" then + current_token.kind := LexerKind._var + elsif token_content = "case" then + current_token.kind := LexerKind._case + elsif token_content = "of" then + current_token.kind := LexerKind._of + elsif token_content = "type" then + current_token.kind := LexerKind._type + elsif token_content = "record" then + current_token.kind := LexerKind._record + elsif token_content = "union" then + current_token.kind := LexerKind._union + elsif token_content = "true" then + current_token.kind := LexerKind.boolean; + current_token.value.boolean_value := true + elsif token_content = "false" then + current_token.kind := LexerKind.boolean; + current_token.value.boolean_value := false + elsif token_content = "nil" then + current_token.kind := LexerKind.null + elsif token_content = "or" then + current_token.kind := LexerKind._or + elsif token_content = "return" then + current_token.kind := LexerKind._return + elsif token_content = "cast" then + current_token.kind := LexerKind._cast + elsif token_content = "defer" then + current_token.kind := LexerKind._defer + elsif token_content = "program" then + current_token.kind := LexerKind._program + elsif token_content = "module" then + current_token.kind := LexerKind._module + elsif token_content = "import" then + current_token.kind := LexerKind._import + else + current_token.kind := LexerKind.identifier; + current_token.value.string := string_dup(token_content) + end; + + return current_token +end; + +proc lexer_add_token(lexer: ^Tokenizer, token: Token); +var + new_length: Word; +begin + new_length := lexer^.length + 1u; + lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token); + (lexer^.data + lexer^.length)^ := token; + lexer^.length := new_length +end; + +(* Read the next token from the input. *) +proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token; +var + current_token: Token; + first_char: Char; +begin + current_token.kind := LexerKind.unknown; + + first_char := source_code_head(source_code); + + if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then + lexer_identifier(@source_code, token_buffer); + current_token := lexer_categorize(string_buffer_clear(token_buffer)) + elsif first_char = '#' then + source_code_advance(@source_code); + lexer_identifier(@source_code, token_buffer); + + current_token.kind := LexerKind.trait; + current_token.value.string := string_dup(string_buffer_clear(token_buffer)) + elsif isdigit(cast(first_char: Int)) <> 0 then + lexer_number(@source_code, @current_token.value.int_value); + + if source_code_expect(@source_code, 'u') then + current_token.kind := LexerKind.word; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.integer + end + elsif first_char = '(' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.left_paren + elsif source_code_head(source_code) = '*' then + source_code_advance(@source_code); + + if lexer_comment(@source_code, token_buffer) then + current_token.value.string := string_dup(string_buffer_clear(token_buffer)); + current_token.kind := LexerKind.comment + else + current_token.kind := LexerKind.unknown + end + else + current_token.kind := LexerKind.left_paren + end + elsif first_char = ')' then + current_token.kind := LexerKind.right_paren; + source_code_advance(@source_code) + elsif first_char = '\'' then + source_code_advance(@source_code); + + if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then + current_token.kind := LexerKind.character; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.unknown + end + elsif first_char = '"' then + source_code_advance(@source_code); + + if lexer_string(@source_code, token_buffer) then + current_token.kind := LexerKind.string; + current_token.value.string := string_dup(string_buffer_clear(token_buffer)) + else + current_token.kind := LexerKind.unknown + end + elsif first_char = '[' then + current_token.kind := LexerKind.left_square; + source_code_advance(@source_code) + elsif first_char = ']' then + current_token.kind := LexerKind.right_square; + source_code_advance(@source_code) + elsif first_char = '>' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.greater_than + elsif source_code_head(source_code) = '=' then + current_token.kind := LexerKind.greater_equal; + source_code_advance(@source_code) + elsif source_code_head(source_code) = '>' then + current_token.kind := LexerKind.shift_right; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.greater_than + end + elsif first_char = '<' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.less_than + elsif source_code_head(source_code) = '=' then + current_token.kind := LexerKind.less_equal; + source_code_advance(@source_code) + elsif source_code_head(source_code) = '<' then + current_token.kind := LexerKind.shift_left; + source_code_advance(@source_code) + elsif source_code_head(source_code) = '>' then + current_token.kind := LexerKind.not_equal; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.less_than + end + elsif first_char = '=' then + current_token.kind := LexerKind.equal; + source_code_advance(@source_code) + elsif first_char = ';' then + current_token.kind := LexerKind.semicolon; + source_code_advance(@source_code) + elsif first_char = '.' then + current_token.kind := LexerKind.dot; + source_code_advance(@source_code) + elsif first_char = ',' then + current_token.kind := LexerKind.comma; + source_code_advance(@source_code) + elsif first_char = '+' then + current_token.kind := LexerKind.plus; + source_code_advance(@source_code) + elsif first_char = '-' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.minus + elsif source_code_head(source_code) = '>' then + current_token.kind := LexerKind.arrow; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.minus + end + elsif first_char = '*' then + current_token.kind := LexerKind.multiplication; + source_code_advance(@source_code) + elsif first_char = '/' then + current_token.kind := LexerKind.division; + source_code_advance(@source_code) + elsif first_char = '%' then + current_token.kind := LexerKind.remainder; + source_code_advance(@source_code) + elsif first_char = ':' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.colon + elsif source_code_head(source_code) = '=' then + current_token.kind := LexerKind.assignment; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.colon + end + elsif first_char = '^' then + current_token.kind := LexerKind.hat; + source_code_advance(@source_code) + elsif first_char = '@' then + current_token.kind := LexerKind.at; + source_code_advance(@source_code) + elsif first_char = '!' then + current_token.kind := LexerKind.exclamation; + source_code_advance(@source_code) + elsif first_char = '&' then + current_token.kind := LexerKind.and; + source_code_advance(@source_code) + elsif first_char = '~' then + current_token.kind := LexerKind.not; + source_code_advance(@source_code) + elsif first_char = '|' then + current_token.kind := LexerKind.pipe; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.unknown; + source_code_advance(@source_code) + end; + + return current_token +end; + +(* Split the source text into tokens. *) +proc lexer_text(source_code: SourceCode) -> Tokenizer; +var + current_token: Token; + token_buffer: StringBuffer; + lexer: Tokenizer; +begin + lexer := Tokenizer(0u, nil); + token_buffer := string_buffer_new(); + + lexer_spaces(@source_code); + + while ~source_code_empty(@source_code) do + current_token := lexer_next(source_code, @token_buffer); + + if current_token.kind <> LexerKind.unknown then + lexer_add_token(@lexer, current_token); + lexer_spaces(@source_code) + else + write_s("Lexical analysis error on \""); + write_c(source_code_head(source_code)); + write_s("\".\n") + end + end; + + return lexer +end; + +(* + Parser. +*) + +proc parse(tokens: ^Token, tokens_size: Word); +var + current_token: ^Token; + i: Word := 0u; +begin + while i < tokens_size do + current_token := tokens + i; + + case current_token^.kind of + LexerKind._if: + write_s("IF") + | LexerKind._then: + write_s("THEN") + | LexerKind._else: + write_s("ELSE") + | LexerKind._elsif: + write_s("ELSIF") + | LexerKind._while: + write_s("WHILE") + | LexerKind._do: + write_s("DO") + | LexerKind._proc: + write_s("PROC") + | LexerKind._begin: + write_s("BEGIN") + | LexerKind._end: + write_s("END") + | LexerKind._extern: + write_s("EXTERN") + | LexerKind._const: + write_s("CONST") + | LexerKind._var: + write_s("VAR") + | LexerKind._case: + write_s("CASE") + | LexerKind._of: + write_s("OF") + | LexerKind._type: + write_s("TYPE") + | LexerKind._record: + write_s("RECORD") + | LexerKind._union: + write_s("UNION") + | LexerKind.pipe: + write_s("|") + | LexerKind.to: + write_s("TO") + | LexerKind.boolean: + write_s("BOOLEAN<"); + write_b(current_token^.value.boolean_value); + write_c('>') + | LexerKind.null: + write_s("NIL") + | LexerKind.and: + write_s("&") + | LexerKind._or: + write_s("OR") + | LexerKind.not: + write_s("~") + | LexerKind._return: + write_s("RETURN") + | LexerKind._cast: + write_s("CAST") + | LexerKind.shift_left: + write_s("<<") + | LexerKind.shift_right: + write_s(">>") + | LexerKind.identifier: + write_c('<'); + write_s(current_token^.value.string); + write_c('>') + | LexerKind.trait: + write_c('#'); + write_s(current_token^.value.string) + | LexerKind.left_paren: + write_s("(") + | LexerKind.right_paren: + write_s(")") + | LexerKind.left_square: + write_s("[") + | LexerKind.right_square: + write_s("]") + | LexerKind.greater_equal: + write_s(">=") + | LexerKind.less_equal: + write_s("<=") + | LexerKind.greater_than: + write_s(">") + | LexerKind.less_than: + write_s("<") + | LexerKind.equal: + write_s("=") + | LexerKind.not_equal: + write_s("<>") + | LexerKind.semicolon: + write_c(';') + | LexerKind.dot: + write_c('.') + | LexerKind.comma: + write_c(',') + | LexerKind.plus: + write_c('+') + | LexerKind.minus: + write_c('-') + | LexerKind.multiplication: + write_c('*') + | LexerKind.division: + write_c('/') + | LexerKind.remainder: + write_c('%') + | LexerKind.assignment: + write_s(":=") + | LexerKind.colon: + write_c(':') + | LexerKind.hat: + write_c('^') + | LexerKind.at: + write_c('@') + | LexerKind.comment: + write_s("(* COMMENT *)") + | LexerKind.integer: + write_c('<'); + write_i(current_token^.value.int_value); + write_c('>') + | LexerKind.word: + write_c('<'); + write_i(current_token^.value.int_value); + write_s("u>") + | LexerKind.character: + write_c('<'); + write_i(cast(current_token^.value.char_value: Int)); + write_s("c>") + | LexerKind.string: + write_s("\"...\"") + | LexerKind._defer: + write_s("DEFER") + | LexerKind.exclamation: + write_c('!') + | LexerKind.arrow: + write_s("->") + | LexerKind._program: + write_s("PROGRAM") + | LexerKind._module: + write_s("MODULE") + | LexerKind._import: + write_s("IMPORT") + else + write_s("UNKNOWN<"); + write_i(cast(current_token^.kind: Int)); + write_c('>') + end; + write_c(' '); + + i := i + 1u + end; + write_c('\n') +end; + +(* + Compilation entry. +*) + +proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; +var + return_code: Int := 0; + lexer: Tokenizer; +begin + if command_line^.lex or command_line^.parse then + lexer := lexer_text(source_code) + end; + if command_line^.parse then + parse(lexer.data, lexer.length) + end; + + return return_code +end; + +proc process(argc: Int, argv: ^^Char) -> Int; +var + tokens: ^Token; + tokens_size: Word; + source_code: SourceCode; + command_line: ^CommandLine; + return_code: Int := 0; + source_file: ^SourceFile; +begin + command_line := parse_command_line(argc, argv); + if command_line = nil then + return_code := 2 + end; + + if return_code = 0 then + source_file := read_source(command_line^.input); + + if source_file = nil then + perror(command_line^.input); + return_code := 3 + end + end; + + if return_code = 0 then + defer + fclose(source_file^.handle) + end; + + source_code.position := TextLocation(1u, 1u); + source_code.input := cast(source_file: Pointer); + source_code.empty := source_file_empty; + source_code.head := source_file_head; + source_code.advance := source_file_advance; + + return_code := compile_in_stages(command_line, source_code) + end; + return return_code +end; + + return process(count, parameters) +end.