From cb0e3287090786fad566feb67ac07b8ef361b2c3 Mon Sep 17 00:00:00 2001 From: Aldo Cortesi Date: Tue, 16 Feb 2010 17:09:07 +1300 Subject: [PATCH] Initial checkin. --- .gitignore | 8 + LICENSE | 674 +++++ MANIFEST.in | 5 + README | 38 + doc-src/01-reset-fonts-grids-base.css | 8 + doc-src/02-docstyle.css | 95 + doc-src/_layout.html | 16 + doc-src/admin.html | 14 + doc-src/faq.html | 17 + doc-src/index.html | 3 + doc-src/index.py | 26 + doc-src/library.html | 15 + doc-src/syntax.css | 120 + examples/stickycookies.py | 35 + libmproxy/__init__.py | 0 libmproxy/console.py | 1065 +++++++ libmproxy/controller.py | 119 + libmproxy/filt.py | 316 +++ libmproxy/proxy.py | 374 +++ libmproxy/pyparsing.py | 3707 +++++++++++++++++++++++++ libmproxy/resources/bogus_template | 11 + libmproxy/utils.py | 277 ++ mitmproxy | 70 + setup.py | 97 + test/.pry | 5 + test/data/serverkey.pem | 32 + test/data/testkey.pem | 32 + test/handler.py | 25 + test/serv.py | 10 + test/sslserv.py | 22 + test/test_console.py | 269 ++ test/test_filt.py | 220 ++ test/test_proxy.py | 259 ++ test/test_utils.py | 221 ++ test/tserv | 30 + todo | 17 + 36 files changed, 8252 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README create mode 100644 doc-src/01-reset-fonts-grids-base.css create mode 100644 doc-src/02-docstyle.css create mode 100644 doc-src/_layout.html create mode 100644 doc-src/admin.html create mode 100644 doc-src/faq.html create mode 100644 doc-src/index.html create mode 100644 doc-src/index.py create mode 100644 doc-src/library.html create mode 100644 doc-src/syntax.css create mode 100644 examples/stickycookies.py create mode 100644 libmproxy/__init__.py create mode 100644 libmproxy/console.py create mode 100644 libmproxy/controller.py create mode 100644 libmproxy/filt.py create mode 100644 libmproxy/proxy.py create mode 100644 libmproxy/pyparsing.py create mode 100644 libmproxy/resources/bogus_template create mode 100644 libmproxy/utils.py create mode 100755 mitmproxy create mode 100644 setup.py create mode 100644 test/.pry create mode 100644 test/data/serverkey.pem create mode 100644 test/data/testkey.pem create mode 100644 test/handler.py create mode 100644 test/serv.py create mode 100644 test/sslserv.py create mode 100644 test/test_console.py create mode 100644 test/test_filt.py create mode 100644 test/test_proxy.py create mode 100644 test/test_utils.py create mode 100755 test/tserv create mode 100644 todo diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..2d49315bc --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +MANIFEST +/build +/dist +/tmp +/doc +*.py[cd] +*.swp +*.swo diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..94a9ed024 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..beffe8856 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,5 @@ +include LICENSE +recursive-include doc * +recursive-include test * +recursive-include libmproxy/resources * +recursive-exclude test *.swo *.swp *.pyc diff --git a/README b/README new file mode 100644 index 000000000..3096277fe --- /dev/null +++ b/README @@ -0,0 +1,38 @@ +mitmproxy is an interactive SSL-capable intercepting HTTP proxy. It lets you to +observe, modify and replay requests and responses on the fly. The underlying +library that mitmproxy is built on can also be used to do these things +programmatically. + +By default, mitmproxy starts up with a mutt-like interactive curses interface - +the help page (which you can view by pressing "?") should tell you everything +you need to know. Note that requests and responses are stored in-memory until +you delete them, so leaving mitmproxy running indefinitely or requesting very +large amounts of data through it is a bad idea. + +mitmproxy intercepts SSL requests by simply assuming that all CONNECT requests +are https. The connection from the browser is wrapped in SSL, and we read the +request by pretending to be the connecting server. We then open an SSL request +to the destination server, and replay the request. + +Releases can be found here: http://corte.si/software + +Source is hosted here: http://github.com/cortesi/mitmproxy + + +Requirements +------------ + +* The curses interface relies on a current version of the +[urwid](http://excess.org/urwid/) library. +* The test suite uses the [pry](http://github.com/cortesi/pry) unit testing + library. + +You should also make sure that your console environment is set up with the +following: + +* EDITOR environment variable to determine the external editor. +* PAGER environment variable to determine the external pager. +* Appropriate entries in your mailcap files to determine external + viewers for request and response contents. + + diff --git a/doc-src/01-reset-fonts-grids-base.css b/doc-src/01-reset-fonts-grids-base.css new file mode 100644 index 000000000..32f5f8bf3 --- /dev/null +++ b/doc-src/01-reset-fonts-grids-base.css @@ -0,0 +1,8 @@ +/* +Copyright (c) 2009, Yahoo! Inc. All rights reserved. +Code licensed under the BSD License: +http://developer.yahoo.net/yui/license.txt +version: 2.7.0 +*/ +html{color:#000;background:#FFF;}body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,code,form,fieldset,legend,input,button,textarea,p,blockquote,th,td{margin:0;padding:0;}table{border-collapse:collapse;border-spacing:0;}fieldset,img{border:0;}address,caption,cite,code,dfn,em,strong,th,var,optgroup{font-style:inherit;font-weight:inherit;}del,ins{text-decoration:none;}li{list-style:none;}caption,th{text-align:left;}h1,h2,h3,h4,h5,h6{font-size:100%;font-weight:normal;}q:before,q:after{content:'';}abbr,acronym{border:0;font-variant:normal;}sup{vertical-align:baseline;}sub{vertical-align:baseline;}legend{color:#000;}input,button,textarea,select,optgroup,option{font-family:inherit;font-size:inherit;font-style:inherit;font-weight:inherit;}input,button,textarea,select{*font-size:100%;}body{font:13px/1.231 arial,helvetica,clean,sans-serif;*font-size:small;*font:x-small;}select,input,button,textarea,button{font:99% arial,helvetica,clean,sans-serif;}table{font-size:inherit;font:100%;}pre,code,kbd,samp,tt{font-family:monospace;*font-size:108%;line-height:100%;}body{text-align:center;}#doc,#doc2,#doc3,#doc4,.yui-t1,.yui-t2,.yui-t3,.yui-t4,.yui-t5,.yui-t6,.yui-t7{margin:auto;text-align:left;width:57.69em;*width:56.25em;}#doc2{width:73.076em;*width:71.25em;}#doc3{margin:auto 10px;width:auto;}#doc4{width:74.923em;*width:73.05em;}.yui-b{position:relative;}.yui-b{_position:static;}#yui-main .yui-b{position:static;}#yui-main,.yui-g .yui-u .yui-g{width:100%;}.yui-t1 #yui-main,.yui-t2 #yui-main,.yui-t3 #yui-main{float:right;margin-left:-25em;}.yui-t4 #yui-main,.yui-t5 #yui-main,.yui-t6 #yui-main{float:left;margin-right:-25em;}.yui-t1 .yui-b{float:left;width:12.30769em;*width:12.00em;}.yui-t1 #yui-main .yui-b{margin-left:13.30769em;*margin-left:13.05em;}.yui-t2 .yui-b{float:left;width:13.8461em;*width:13.50em;}.yui-t2 #yui-main .yui-b{margin-left:14.8461em;*margin-left:14.55em;}.yui-t3 .yui-b{float:left;width:23.0769em;*width:22.50em;}.yui-t3 #yui-main .yui-b{margin-left:24.0769em;*margin-left:23.62em;}.yui-t4 .yui-b{float:right;width:13.8456em;*width:13.50em;}.yui-t4 #yui-main .yui-b{margin-right:14.8456em;*margin-right:14.55em;}.yui-t5 .yui-b{float:right;width:18.4615em;*width:18.00em;}.yui-t5 #yui-main .yui-b{margin-right:19.4615em;*margin-right:19.125em;}.yui-t6 .yui-b{float:right;width:23.0769em;*width:22.50em;}.yui-t6 #yui-main .yui-b{margin-right:24.0769em;*margin-right:23.62em;}.yui-t7 #yui-main .yui-b{display:block;margin:0 0 1em 0;}#yui-main .yui-b{float:none;width:auto;}.yui-gb .yui-u,.yui-g .yui-gb .yui-u,.yui-gb .yui-g,.yui-gb .yui-gb,.yui-gb .yui-gc,.yui-gb .yui-gd,.yui-gb .yui-ge,.yui-gb .yui-gf,.yui-gc .yui-u,.yui-gc .yui-g,.yui-gd .yui-u{float:left;}.yui-g .yui-u,.yui-g .yui-g,.yui-g .yui-gb,.yui-g .yui-gc,.yui-g .yui-gd,.yui-g .yui-ge,.yui-g .yui-gf,.yui-gc .yui-u,.yui-gd .yui-g,.yui-g .yui-gc .yui-u,.yui-ge .yui-u,.yui-ge .yui-g,.yui-gf .yui-g,.yui-gf .yui-u{float:right;}.yui-g div.first,.yui-gb div.first,.yui-gc div.first,.yui-gd div.first,.yui-ge div.first,.yui-gf div.first,.yui-g .yui-gc div.first,.yui-g .yui-ge div.first,.yui-gc div.first div.first{float:left;}.yui-g .yui-u,.yui-g .yui-g,.yui-g .yui-gb,.yui-g .yui-gc,.yui-g .yui-gd,.yui-g .yui-ge,.yui-g .yui-gf{width:49.1%;}.yui-gb .yui-u,.yui-g .yui-gb .yui-u,.yui-gb .yui-g,.yui-gb .yui-gb,.yui-gb .yui-gc,.yui-gb .yui-gd,.yui-gb .yui-ge,.yui-gb .yui-gf,.yui-gc .yui-u,.yui-gc .yui-g,.yui-gd .yui-u{width:32%;margin-left:1.99%;}.yui-gb .yui-u{*margin-left:1.9%;*width:31.9%;}.yui-gc div.first,.yui-gd .yui-u{width:66%;}.yui-gd div.first{width:32%;}.yui-ge div.first,.yui-gf .yui-u{width:74.2%;}.yui-ge .yui-u,.yui-gf div.first{width:24%;}.yui-g .yui-gb div.first,.yui-gb div.first,.yui-gc div.first,.yui-gd div.first{margin-left:0;}.yui-g .yui-g .yui-u,.yui-gb .yui-g .yui-u,.yui-gc .yui-g .yui-u,.yui-gd .yui-g .yui-u,.yui-ge .yui-g .yui-u,.yui-gf .yui-g .yui-u{width:49%;*width:48.1%;*margin-left:0;}.yui-g .yui-g .yui-u{width:48.1%;}.yui-g .yui-gb div.first,.yui-gb .yui-gb div.first{*margin-right:0;*width:32%;_width:31.7%;}.yui-g .yui-gc div.first,.yui-gd .yui-g{width:66%;}.yui-gb .yui-g div.first{*margin-right:4%;_margin-right:1.3%;}.yui-gb .yui-gc div.first,.yui-gb .yui-gd div.first{*margin-right:0;}.yui-gb .yui-gb .yui-u,.yui-gb .yui-gc .yui-u{*margin-left:1.8%;_margin-left:4%;}.yui-g .yui-gb .yui-u{_margin-left:1.0%;}.yui-gb .yui-gd .yui-u{*width:66%;_width:61.2%;}.yui-gb .yui-gd div.first{*width:31%;_width:29.5%;}.yui-g .yui-gc .yui-u,.yui-gb .yui-gc .yui-u{width:32%;_float:right;margin-right:0;_margin-left:0;}.yui-gb .yui-gc div.first{width:66%;*float:left;*margin-left:0;}.yui-gb .yui-ge .yui-u,.yui-gb .yui-gf .yui-u{margin:0;}.yui-gb .yui-gb .yui-u{_margin-left:.7%;}.yui-gb .yui-g div.first,.yui-gb .yui-gb div.first{*margin-left:0;}.yui-gc .yui-g .yui-u,.yui-gd .yui-g .yui-u{*width:48.1%;*margin-left:0;}.yui-gb .yui-gd div.first{width:32%;}.yui-g .yui-gd div.first{_width:29.9%;}.yui-ge .yui-g{width:24%;}.yui-gf .yui-g{width:74.2%;}.yui-gb .yui-ge div.yui-u,.yui-gb .yui-gf div.yui-u{float:right;}.yui-gb .yui-ge div.first,.yui-gb .yui-gf div.first{float:left;}.yui-gb .yui-ge .yui-u,.yui-gb .yui-gf div.first{*width:24%;_width:20%;}.yui-gb .yui-ge div.first,.yui-gb .yui-gf .yui-u{*width:73.5%;_width:65.5%;}.yui-ge div.first .yui-gd .yui-u{width:65%;}.yui-ge div.first .yui-gd div.first{width:32%;}#hd:after,#bd:after,#ft:after,.yui-g:after,.yui-gb:after,.yui-gc:after,.yui-gd:after,.yui-ge:after,.yui-gf:after{content:".";display:block;height:0;clear:both;visibility:hidden;}#hd,#bd,#ft,.yui-g,.yui-gb,.yui-gc,.yui-gd,.yui-ge,.yui-gf{zoom:1;} +body{margin:10px;}h1{font-size:138.5%;}h2{font-size:123.1%;}h3{font-size:108%;}h1,h2,h3{margin:1em 0;}h1,h2,h3,h4,h5,h6,strong,dt{font-weight:bold;}optgroup{font-weight:normal;}abbr,acronym{border-bottom:1px dotted #000;cursor:help;}em{font-style:italic;}del{text-decoration:line-through;}blockquote,ul,ol,dl{margin:1em;}ol,ul,dl{margin-left:2em;}ol li{list-style:decimal outside;}ul li{list-style:disc outside;}dl dd{margin-left:1em;}th,td{border:1px solid #000;padding:.5em;}th{font-weight:bold;text-align:center;}caption{margin-bottom:.5em;text-align:center;}sup{vertical-align:super;}sub{vertical-align:sub;}p,fieldset,table,pre{margin-bottom:1em;}button,input[type="checkbox"],input[type="radio"],input[type="reset"],input[type="submit"]{padding:1px;} diff --git a/doc-src/02-docstyle.css b/doc-src/02-docstyle.css new file mode 100644 index 000000000..7d33b8615 --- /dev/null +++ b/doc-src/02-docstyle.css @@ -0,0 +1,95 @@ +body { + -x-system-font:none; + font-family: Helvetica,Arial,Tahoma,Verdana,Sans-Serif; + color: #555555; + font-size: 1.3em; +} + +a { + color: #3F8ED8; +} + +#hd { + margin: 0; + border-bottom: 1px solid #999; +} +#hd h1 { + letter-spacing: 3px; + font-size: 2.5em; + line-height: 100%; + margin: 0.3em 0; + font-weight: normal; +} + +#bd { + padding: 20px; +} + +#bd h1 { + font-size: 1.6em; + margin-top: 5px; + margin-bottom: 5px; +} + +#bd h2 { + font-size: 1.2em; + margin-top: 5px; + margin-bottom: 5px; +} + +#ft { + color: #aaa; + border-top: 1px solid #aaa; + clear: both; + margin: 0 0 2em 0; + font-size: 0.8em; + letter-spacing: 0.5px; +} + +.pageindex { + font-size: 1.5em; +} + +.pageindex ul { + list-style-image:none; + list-style-position:outside; + list-style-type:none; + margin: 0px; +} + +.pageindex li { + list-style-image:none; + list-style-position:outside; + list-style-type:none; + margin: 0; +} + +.pageindex li.active { + padding-left: 4px; + border-left: 5px solid #ff0000; +} + +.pageindex li.inactive{ + border-left: none; + margin-left: 9px; +} + +.pageindex li li a { + display: block; + background-color: transparent; + margin: 0; + border-top: none; + border-bottom: none; +} + +.pageindex ul ul { + margin-left: 20px; + padding: 0; + list-style-type: none; +} + + +.faq .question { + font-size: 1.1em; + font-weight: bold; +} diff --git a/doc-src/_layout.html b/doc-src/_layout.html new file mode 100644 index 000000000..2e706ecd0 --- /dev/null +++ b/doc-src/_layout.html @@ -0,0 +1,16 @@ +
+
+ $!head!$ +
+
+
+
$!body!$
+
+
+
@!sidebar!@
+
+
+
+

@!copyright!@

+
+
diff --git a/doc-src/admin.html b/doc-src/admin.html new file mode 100644 index 000000000..6954010cf --- /dev/null +++ b/doc-src/admin.html @@ -0,0 +1,14 @@ + +

Contact

+ +

Please send any comments, suggestions and bug reports to + $!docMaintainerEmail!$. +

+ + +

License

+ +
+@!license!@
+
+ diff --git a/doc-src/faq.html b/doc-src/faq.html new file mode 100644 index 000000000..8b0a3ff5f --- /dev/null +++ b/doc-src/faq.html @@ -0,0 +1,17 @@ + +
+ +

On some sites I see a lot of "Connection from.." + entries that never complete.

+ +

This is probably because the page requests resources from SSL-protected + domains. These requests are intercepted by mitmproxy, but because we're + using a bogus certificate, the browser-side of the connection hangs. The + browser doesn't prompt you to add a certificate trust exception for remote + page components, only for the primary domain being visited.

+ +

To solve this, use something like FireBug to find out which page + components are hanging. Visit the relevant domains using your browser, and + add a certificate trust exception for each one.

+ +
diff --git a/doc-src/index.html b/doc-src/index.html new file mode 100644 index 000000000..859ffad07 --- /dev/null +++ b/doc-src/index.html @@ -0,0 +1,3 @@ + +@!index_contents!@ + diff --git a/doc-src/index.py b/doc-src/index.py new file mode 100644 index 000000000..2b6dde6a0 --- /dev/null +++ b/doc-src/index.py @@ -0,0 +1,26 @@ +import countershape +from countershape import Page, Directory, PythonModule +import countershape.grok + +this.layout = countershape.Layout("_layout.html") +this.markup = "markdown" +ns.docTitle = "mitmproxy" +ns.docMaintainer = "Aldo Cortesi" +ns.docMaintainerEmail = "aldo@corte.si" +ns.copyright = "Aldo Cortesi 2010" +ns.head = countershape.template.Template(None, "

@!docTitle!@ - @!this.title!@

") +ns.sidebar = countershape.widgets.SiblingPageIndex( + '/index.html', + exclude=['countershape'] + ) + +ns.license = file("../LICENSE").read() +ns.index_contents = file("../README").read() +ns.example = file("../examples/stickycookies.py").read() + +pages = [ + Page("index.html", "introduction"), + Page("library.html", "library"), + Page("faq.html", "faq"), + Page("admin.html", "administrivia") +] diff --git a/doc-src/library.html b/doc-src/library.html new file mode 100644 index 000000000..e8533731e --- /dev/null +++ b/doc-src/library.html @@ -0,0 +1,15 @@ + +All of mitmproxy's basic functionality is exposed through the __libmproxy__ +library. The example below shows a simple implementation of the "sticky cookie" +functionality included in the interactive mitmproxy program. Traffic is +monitored for __cookie__ and __set-cookie__ headers, and requests are rewritten +to include a previously seen cookie if they don't already have one. In effect, +this lets you log in to a site using your browser, and then make subsequent +requests using a tool like __curl__, which will then seem to be part of the +authenticated session. + + + +$!example!$ + + diff --git a/doc-src/syntax.css b/doc-src/syntax.css new file mode 100644 index 000000000..e371658ab --- /dev/null +++ b/doc-src/syntax.css @@ -0,0 +1,120 @@ +.highlight { background: #f8f8f8; } +.highlight .c { color: #408080; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #BC7A00 } /* Comment.Preproc */ +.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #808080 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0040D0 } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #B00040 } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #7D9029 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #A0A000 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #BB6688 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ +.grokdoc { background: #f8f8f8; } +.grokdoc .c { color: #408080; font-style: italic } /* Comment */ +.grokdoc .err { border: 1px solid #FF0000 } /* Error */ +.grokdoc .k { color: #008000; font-weight: bold } /* Keyword */ +.grokdoc .o { color: #666666 } /* Operator */ +.grokdoc .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.grokdoc .cp { color: #BC7A00 } /* Comment.Preproc */ +.grokdoc .c1 { color: #408080; font-style: italic } /* Comment.Single */ +.grokdoc .cs { color: #408080; font-style: italic } /* Comment.Special */ +.grokdoc .gd { color: #A00000 } /* Generic.Deleted */ +.grokdoc .ge { font-style: italic } /* Generic.Emph */ +.grokdoc .gr { color: #FF0000 } /* Generic.Error */ +.grokdoc .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.grokdoc .gi { color: #00A000 } /* Generic.Inserted */ +.grokdoc .go { color: #808080 } /* Generic.Output */ +.grokdoc .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.grokdoc .gs { font-weight: bold } /* Generic.Strong */ +.grokdoc .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.grokdoc .gt { color: #0040D0 } /* Generic.Traceback */ +.grokdoc .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.grokdoc .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.grokdoc .kp { color: #008000 } /* Keyword.Pseudo */ +.grokdoc .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.grokdoc .kt { color: #B00040 } /* Keyword.Type */ +.grokdoc .m { color: #666666 } /* Literal.Number */ +.grokdoc .s { color: #BA2121 } /* Literal.String */ +.grokdoc .na { color: #7D9029 } /* Name.Attribute */ +.grokdoc .nb { color: #008000 } /* Name.Builtin */ +.grokdoc .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.grokdoc .no { color: #880000 } /* Name.Constant */ +.grokdoc .nd { color: #AA22FF } /* Name.Decorator */ +.grokdoc .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.grokdoc .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.grokdoc .nf { color: #0000FF } /* Name.Function */ +.grokdoc .nl { color: #A0A000 } /* Name.Label */ +.grokdoc .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.grokdoc .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.grokdoc .nv { color: #19177C } /* Name.Variable */ +.grokdoc .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.grokdoc .w { color: #bbbbbb } /* Text.Whitespace */ +.grokdoc .mf { color: #666666 } /* Literal.Number.Float */ +.grokdoc .mh { color: #666666 } /* Literal.Number.Hex */ +.grokdoc .mi { color: #666666 } /* Literal.Number.Integer */ +.grokdoc .mo { color: #666666 } /* Literal.Number.Oct */ +.grokdoc .sb { color: #BA2121 } /* Literal.String.Backtick */ +.grokdoc .sc { color: #BA2121 } /* Literal.String.Char */ +.grokdoc .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.grokdoc .s2 { color: #BA2121 } /* Literal.String.Double */ +.grokdoc .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.grokdoc .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.grokdoc .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.grokdoc .sx { color: #008000 } /* Literal.String.Other */ +.grokdoc .sr { color: #BB6688 } /* Literal.String.Regex */ +.grokdoc .s1 { color: #BA2121 } /* Literal.String.Single */ +.grokdoc .ss { color: #19177C } /* Literal.String.Symbol */ +.grokdoc .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.grokdoc .vc { color: #19177C } /* Name.Variable.Class */ +.grokdoc .vg { color: #19177C } /* Name.Variable.Global */ +.grokdoc .vi { color: #19177C } /* Name.Variable.Instance */ +.grokdoc .il { color: #666666 } /* Literal.Number.Integer.Long */ diff --git a/examples/stickycookies.py b/examples/stickycookies.py new file mode 100644 index 000000000..94b358760 --- /dev/null +++ b/examples/stickycookies.py @@ -0,0 +1,35 @@ +from libmproxy import controller, proxy + +proxy.config = proxy.Config( + "~/.mitmproxy/cert.pem" +) + +class StickyMaster(controller.Master): + def __init__(self, server): + controller.Master.__init__(self, server) + self.stickyhosts = {} + + def run(self): + try: + return controller.Master.run(self) + except KeyboardInterrupt: + self.shutdown() + + def handle_request(self, msg): + hid = (msg.host, msg.port) + if msg.headers.has_key("cookie"): + self.stickyhosts[hid] = msg.headers["cookie"] + elif hid in self.stickyhosts: + msg.headers["cookie"] = self.stickyhosts[hid] + msg.ack() + + def handle_response(self, msg): + hid = (msg.request.host, msg.request.port) + if msg.headers.has_key("set-cookie"): + self.stickyhosts[hid] = f.response.headers["set-cookie"] + msg.ack() + + +server = proxy.ProxyServer(8080) +m = StickyMaster(server) +m.run() diff --git a/libmproxy/__init__.py b/libmproxy/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/libmproxy/console.py b/libmproxy/console.py new file mode 100644 index 000000000..7a96ec38a --- /dev/null +++ b/libmproxy/console.py @@ -0,0 +1,1065 @@ +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import Queue, mailcap, mimetypes, tempfile, os, subprocess, threading +import cStringIO +import urwid.curses_display +import urwid +import controller, utils, filt, proxy + + +class Stop(Exception): pass + + +def format_keyvals(lst, key="key", val="text", space=5, indent=0): + ret = [] + if lst: + pad = max(len(i[0]) for i in lst) + space + for i in lst: + ret.extend( + [ + " "*indent, + (key, i[0]), + " "*(pad-len(i[0])), + (val, i[1]), + "\n" + ] + ) + return ret + + +#begin nocover + +class ReplayThread(threading.Thread): + def __init__(self, flow, masterq): + self.flow, self.masterq = flow, masterq + threading.Thread.__init__(self) + + def run(self): + try: + server = proxy.ServerConnection(self.flow.request) + response = server.read_response() + response.send(self.masterq) + except proxy.ProxyError, v: + err = proxy.Error(self.flow.connection, v.msg) + err.send(self.masterq) + + +class ConnectionItem(urwid.WidgetWrap): + def __init__(self, master, state, flow): + self.master, self.state, self.flow = master, state, flow + w = self.get_text() + urwid.WidgetWrap.__init__(self, w) + + def intercept(self): + self.intercepting = True + self.w = self.get_text() + + def get_text(self, nofocus=False): + return urwid.Text(self.flow.get_text(nofocus)) + + def selectable(self): + return True + + def keypress(self, (maxcol,), key): + if key == "a": + self.flow.accept_intercept() + self.master.sync_list_view() + elif key == "d": + if not self.state.delete_flow(self.flow): + self.master.statusbar.message("Can't delete connection mid-intercept.") + self.master.sync_list_view() + elif key == "r": + r = self.state.replay(self.flow, self.master.masterq) + if r: + self.master.statusbar.message(r) + self.master.sync_list_view() + elif key == "R": + self.state.revert(self.flow) + self.master.sync_list_view() + elif key == "z": + self.master.kill_connection(self.flow) + elif key == "enter": + if self.flow.request: + self.master.view_connection(self.flow) + return key + + +class ConnectionListView(urwid.ListWalker): + def __init__(self, master, state): + self.master, self.state = master, state + + def get_focus(self): + f, i = self.state.get_focus() + f = ConnectionItem(self.master, self.state, f) if f else None + return f, i + + def set_focus(self, focus): + ret = self.state.set_focus(focus) + self._modified() + return ret + + def get_next(self, pos): + f, i = self.state.get_next(pos) + f = ConnectionItem(self.master, self.state, f) if f else None + return f, i + + def get_prev(self, pos): + f, i = self.state.get_prev(pos) + f = ConnectionItem(self.master, self.state, f) if f else None + return f, i + + +class ConnectionViewHeader(urwid.WidgetWrap): + def __init__(self, flow): + self.flow = flow + self.w = urwid.Text(flow.get_text(nofocus=True, padding=0)) + + def refresh_connection(self, f): + if f == self.flow: + self.w = urwid.Text(f.get_text(nofocus=True, padding=0)) + + +class ConnectionView(urwid.WidgetWrap): + REQ = 0 + RESP = 1 + tabs = ["Request", "Response"] + methods = [ + ("get", "g"), + ("post", "p"), + ("put", "u"), + ("head", "h"), + ("trace", "t"), + ("delete", "d"), + ("options", "o"), + ] + def __init__(self, master, state, flow): + self.master, self.state, self.flow = master, state, flow + self.binary = False + self.view_request() + + def _tab(self, content, active): + if active: + attr = "heading" + else: + attr = "inactive" + p = urwid.Text(content) + p = urwid.Padding(p, align="left", width=("relative", 100)) + p = urwid.AttrWrap(p, attr) + return p + + def wrap_body(self, active, body): + parts = [] + + if self.flow.intercepting and not self.flow.request.acked: + qt = "Request (intercepted)" + else: + qt = "Request" + if active == self.REQ: + parts.append(self._tab(qt, True)) + else: + parts.append(self._tab(qt, False)) + + if self.flow.response: + if self.flow.intercepting and not self.flow.response.acked: + st = "Response (intercepted)" + else: + st = "Response" + if active == self.RESP: + parts.append(self._tab(st, True)) + else: + parts.append(self._tab(st, False)) + + h = urwid.Columns(parts, dividechars=1) + f = urwid.Frame( + body, + header=h + ) + return f + + def _conn_text(self, conn): + txt = [] + txt.extend( + format_keyvals( + [(h+":", v) for (h, v) in sorted(conn.headers.itemPairs())], + key = "header", + val = "text" + ) + ) + txt.append("\n\n") + if conn.content: + if self.binary or utils.isBin(conn.content): + for offset, hex, s in utils.hexdump(conn.content): + txt.extend([ + ("offset", offset), + " ", + ("text", hex), + " ", + ("text", s), + "\n" + ]) + else: + for i in conn.content.splitlines(): + txt.append( + ("text", i), + ) + txt.append( + ("text", "\n"), + ) + return urwid.ListBox([urwid.Text(txt)]) + + def view_request(self): + self.viewing = self.REQ + body = self._conn_text(self.flow.request) + self.w = self.wrap_body(self.REQ, body) + + def view_response(self): + if self.flow.response: + self.viewing = self.RESP + body = self._conn_text(self.flow.response) + self.w = self.wrap_body(self.RESP, body) + + def refresh_connection(self, c=None): + if c == self.flow: + if self.viewing == self.REQ: + self.view_request() + else: + self.view_response() + + def _spawn_editor(self, data): + fd, name = tempfile.mkstemp('', "mproxy") + os.write(fd, data) + os.close(fd) + c = os.environ.get("EDITOR") + cmd = [c, name] + ret = subprocess.call(cmd) + # Not sure why, unless we do this we get a visible cursor after + # spawning 'less'. + self.master.ui._curs_set(1) + self.master.ui.clear() + data = open(name).read() + os.unlink(name) + return data + + def edit_method(self, m): + for i in self.methods: + if i[1] == m: + self.flow.request.method = i[0].upper() + self.master.refresh_connection(self.flow) + + def edit(self, part): + if self.viewing == self.REQ: + conn = self.flow.request + else: + conn = self.flow.response + if part == "b": + conn.content = self._spawn_editor(conn.content or "") + elif part == "h": + headertext = self._spawn_editor(repr(conn.headers)) + headers = utils.Headers() + fp = cStringIO.StringIO(headertext) + headers.read(fp) + conn.headers = headers + elif part == "u" and self.viewing == self.REQ: + conn = self.flow.request + url = self._spawn_editor(conn.url()) + url = url.strip() + if not conn.set_url(url): + return "Invalid URL." + elif part == "m" and self.viewing == self.REQ: + self.master.prompt_onekey("Method ", self.methods, self.edit_method) + key = None + self.master.refresh_connection(self.flow) + + def keypress(self, size, key): + if key == "tab": + if self.viewing == self.REQ: + self.view_response() + else: + self.view_request() + elif key in ("up", "down", "page up", "page down"): + # Why doesn't this just work?? + self.w.body.keypress(size, key) + elif key == "a": + self.flow.accept_intercept() + self.master.view_connection(self.flow) + elif key == "b": + self.binary = not self.binary + self.master.refresh_connection(self.flow) + elif key == "e": + if self.viewing == self.REQ: + self.master.prompt_onekey( + "Edit request ", + ( + ("header", "h"), + ("body", "b"), + ("url", "u"), + ("method", "m") + ), + self.edit + ) + else: + self.master.prompt_onekey( + "Edit response ", + ( + ("header", "h"), + ("body", "b"), + ), + self.edit + ) + key = None + elif key == "r": + r = self.state.replay(self.flow, self.master.masterq) + if r: + self.master.statusbar.message(r) + self.master.refresh_connection(self.flow) + elif key == "R": + self.state.revert(self.flow) + self.master.refresh_connection(self.flow) + elif key == "v": + if self.viewing == self.REQ: + conn = self.flow.request + else: + conn = self.flow.response + if conn.content: + t = conn.headers.get("content-type", [None]) + t = t[0] + if t: + ext = mimetypes.guess_extension(t) or "" + else: + ext = "" + fd, name = tempfile.mkstemp(ext, "mproxy") + os.write(fd, conn.content) + os.close(fd) + t = conn.headers.get("content-type", [None]) + t = t[0] + + cmd = None + shell = False + + if t: + c = mailcap.getcaps() + cmd, _ = mailcap.findmatch(c, t, filename=name) + if cmd: + shell = True + if not cmd: + c = os.environ.get("PAGER") or os.environ.get("EDITOR") + cmd = [c, name] + ret = subprocess.call(cmd, shell=shell) + # Not sure why, unless we do this we get a visible cursor after + # spawning 'less'. + self.master.ui._curs_set(1) + self.master.ui.clear() + os.unlink(name) + return key + + +class ActionBar(urwid.WidgetWrap): + def __init__(self): + self.message("") + + def selectable(self): + return True + + def prompt(self, prompt): + self.w = urwid.Edit(prompt) + + def message(self, message): + self.w = urwid.Text(message) + + +class StatusBar(urwid.WidgetWrap): + def __init__(self, master, text): + self.master, self.text = master, text + self.ab = ActionBar() + self.ib = urwid.AttrWrap(urwid.Text(""), 'foot') + self.w = urwid.Pile([self.ib, self.ab]) + self.redraw() + + def redraw(self): + status = urwid.Columns([ + urwid.Text([('title', "mproxy:%s"%self.master.server.port)]), + urwid.Text( + [ + self.text, + ('text', "%5s"%("[%s]"%len(self.master.state.flow_list))) + ], + align="right"), + ]) + self.ib.set_w(status) + + def update(self, text): + self.text = text + self.redraw() + + def selectable(self): + return True + + def get_edit_text(self): + return self.ab.w.get_edit_text() + + def prompt(self, prompt): + self.ab.prompt(prompt) + + def message(self, msg): + self.ab.message(msg) + + +#end nocover + +class ReplayConnection: + pass + + +class Flow: + def __init__(self, connection): + self.connection = connection + self.request, self.response, self.error = None, None, None + self.waiting = True + self.focus = False + self.intercepting = False + self._backup = None + + def backup(self): + if not self._backup: + self._backup = [ + self.connection.copy() if self.connection else None, + self.request.copy() if self.request else None, + self.response.copy() if self.response else None, + self.error.copy() if self.error else None, + ] + + def revert(self): + if self._backup: + self.waiting = False + restore = [i.copy() if i else None for i in self._backup] + self.connection, self.request, self.response, self.error = restore + + def match(self, pattern): + if pattern: + if self.response: + return pattern(self.response) + elif self.request: + return pattern(self.request) + return False + + def is_replay(self): + return isinstance(self.connection, ReplayConnection) + + def get_text(self, nofocus=False, padding=3): + if not self.request and not self.response: + txt = [ + ("title", " Connection from %s..."%(self.connection.address)), + ] + else: + txt = [ + ("ack", "!") if self.intercepting and not self.request.acked else " ", + ("method", self.request.method), + " ", + ( + "text" if (self.response or self.error) else "title", + self.request.url(), + ), + ] + if self.response or self.error or self.is_replay(): + txt.append("\n" + " "*(padding+2)) + if self.is_replay(): + txt.append(("method", "[replay] ")) + if not (self.response or self.error): + txt.append(("text", "waiting for response...")) + + if self.response: + txt.append( + ("ack", "!") if self.intercepting and not self.response.acked else " " + ) + txt.append("-> ") + if self.response.code in [200, 304]: + txt.append(("goodcode", str(self.response.code))) + else: + txt.append(("error", str(self.response.code))) + t = self.response.headers.get("content-type") + if t: + t = t[0].split(";")[0] + txt.append(("text", " %s"%t)) + if self.response.content: + txt.append(", %s"%utils.pretty_size(len(self.response.content))) + elif self.error: + txt.append( + ("error", self.error.msg) + ) + if self.focus and not nofocus: + txt.insert(0, ("focus", ">>" + " "*(padding-2))) + else: + txt.insert(0, " "*padding) + return txt + + def kill(self): + if self.intercepting: + if not self.request.acked: + self.request.kill = True + self.request.ack() + elif self.response and not self.response.acked: + self.response.kill = True + self.response.ack() + self.intercepting = False + + def intercept(self): + self.intercepting = True + + def accept_intercept(self): + if not self.request.acked: + self.request.ack() + elif self.response and not self.response.acked: + self.response.ack() + self.intercepting = False + + +class State: + def __init__(self): + self.flow_map = {} + self.flow_list = [] + self.focus = None + # These are compiled filt expressions: + self.limit = None + self.intercept = None + + def add_browserconnect(self, f): + self.flow_list.insert(0, f) + self.flow_map[f.connection] = f + if self.focus is None: + self.set_focus(0) + else: + self.set_focus(self.focus + 1) + + def add_request(self, req): + f = self.flow_map.get(req.connection) + if not f: + return False + f.request = req + return f + + def add_response(self, resp): + f = self.flow_map.get(resp.request.connection) + if not f: + return False + f.response = resp + f.waiting = False + f.backup() + return f + + def add_error(self, err): + f = self.flow_map.get(err.connection) + if not f: + return False + f.error = err + f.waiting = False + f.backup() + return f + + @property + def view(self): + if self.limit: + return [i for i in self.flow_list if i.match(self.limit)] + else: + return self.flow_list[:] + + def set_limit(self, limit): + """ + Limit is a compiled filter expression, or None. + """ + self.limit = limit + self.set_focus(self.focus) + + def get_connection(self, itm): + if isinstance(itm, (proxy.BrowserConnection, ReplayConnection)): + return itm + elif hasattr(itm, "connection"): + return itm.connection + elif hasattr(itm, "request"): + return itm.request.connection + + def lookup(self, itm): + """ + Checks for matching connection, using a Flow, Replay Connection, + BrowserConnection, Request, Response or Error object. Returns None + if not found. + """ + connection = self.get_connection(itm) + return self.flow_map.get(connection) + + def get_focus(self): + if not self.view: + return None, None + return self.view[self.focus], self.focus + + def set_focus(self, idx): + if self.view: + for i in self.view: + i.focus = False + if idx >= len(self.view): + idx = len(self.view) - 1 + elif idx < 0: + idx = 0 + self.view[idx].focus = True + self.focus = idx + + def get_from_pos(self, pos): + if len(self.view) <= pos or pos < 0: + return None, None + return self.view[pos], pos + + def get_next(self, pos): + return self.get_from_pos(pos+1) + + def get_prev(self, pos): + return self.get_from_pos(pos-1) + + def delete_flow(self, f): + if not f.intercepting: + c = self.get_connection(f) + self.view[self.focus].focus = False + del self.flow_map[c] + self.flow_list.remove(f) + self.set_focus(self.focus) + return True + return False + + def clear(self): + for i in self.flow_list[:]: + self.delete_flow(i) + + def kill_flow(self, f): + f.kill() + self.delete_flow(f) + + def revert(self, f): + """ + Replaces the matching connection object with a ReplayConnection object. + """ + conn = self.get_connection(f) + del self.flow_map[conn] + f.revert() + self.flow_map[f.connection] = f + + def replay(self, f, masterq): + """ + Replaces the matching connection object with a ReplayConnection object. + + Returns None if successful, or error message if not. + """ + #begin nocover + if f.intercepting: + return "Can't replay while intercepting..." + if f.request: + f.backup() + conn = self.get_connection(f) + del self.flow_map[conn] + rp = ReplayConnection() + f.connection = rp + f.request.connection = rp + if f.request.content: + f.request.headers["content-length"] = [str(len(f.request.content))] + f.response = None + f.error = None + self.flow_map[rp] = f + rt = ReplayThread(f, masterq) + rt.start() + #end nocover + + +#begin nocover + +class ConsoleMaster(controller.Master): + palette = [ + ('body', 'black', 'dark cyan', 'standout'), + ('foot', 'light gray', 'black'), + ('title', 'white', 'black',), + ('editline', 'white', 'black',), + + # Help + ('key', 'light cyan', 'black', 'underline'), + ('head', 'white', 'black'), + ('text', 'light gray', 'black'), + + # List and Connections + ('method', 'dark cyan', 'black'), + ('focus', 'yellow', 'black'), + ('goodcode', 'light green', 'black'), + ('error', 'light red', 'black'), + ('header', 'dark cyan', 'black'), + ('heading', 'white', 'dark blue'), + ('inactive', 'dark gray', 'black'), + ('ack', 'light red', 'black'), + + # Hex view + ('offset', 'dark cyan', 'black'), + ] + footer_text_default = [ + ('key', "?"), ":help ", + ('key', "q"), ":exit ", + ] + footer_text_connview = [ + ('key', "tab"), ":toggle view ", + ('key', "?"), ":help ", + ('key', "q"), ":back ", + ] + def __init__(self, server, config): + controller.Master.__init__(self, server) + self.config = config + self.state = State() + + self.stickycookie = None + self.stickyhosts = {} + + def run(self): + self.ui = urwid.curses_display.Screen() + self.ui.register_palette(self.palette) + self.conn_list_view = ConnectionListView(self, self.state) + + self.view = None + self.statusbar = None + self.header = None + self.body = None + + self.prompting = False + self.onekey = False + self.view_connlist() + + self.ui.run_wrapper(self.loop) + # If True, quit just pops out to connection list view. + self.nested = False + + def make_view(self): + self.view = urwid.Frame( + self.body, + header = self.header, + footer = self.statusbar + ) + self.view.set_focus("body") + + def view_connlist(self): + self.body = urwid.ListBox(self.conn_list_view) + self.statusbar = StatusBar(self, self.footer_text_default) + self.header = None + self.nested = False + self.make_view() + + def view_connection(self, flow): + self.statusbar = StatusBar(self, self.footer_text_connview) + self.body = ConnectionView(self, self.state, flow) + self.header = ConnectionViewHeader(flow) + self.nested = True + self.make_view() + + def helptext(self): + text = [] + text.extend([("head", "Global keys:\n")]) + keys = [ + ("a", "accept intercepted request or response"), + ("i", "set interception pattern"), + ("j, k", "up, down"), + ("l", "set limit filter pattern"), + ("q", "quit / return to connection list"), + ("r", "replay request"), + ("s", "set sticky cookie expression"), + ("R", "revert changes to request"), + ("page up/down", "page up/down"), + ("space", "page down"), + ("enter", "view connection"), + ] + text.extend(format_keyvals(keys, key="key", val="text", indent=4)) + + text.extend([("head", "\n\nConnection list keys:\n")]) + keys = [ + ("C", "clear connection list"), + ("d", "delete connection from view"), + ("z", "kill and delete connection, even if it's mid-intercept"), + ] + text.extend(format_keyvals(keys, key="key", val="text", indent=4)) + + text.extend([("head", "\n\nConnection view keys:\n")]) + keys = [ + ("b", "toggle hexdump view"), + ("e", "edit response/request"), + ("v", "view contents in external viewer"), + ("tab", "toggle response/request view"), + ] + text.extend(format_keyvals(keys, key="key", val="text", indent=4)) + + text.extend([("head", "\n\nFilter expressions:\n")]) + f = [] + for i in filt.filt_unary: + f.append( + ("~%s"%i.code, i.help) + ) + for i in filt.filt_rex: + f.append( + ("~%s regex"%i.code, i.help) + ) + for i in filt.filt_int: + f.append( + ("~%s int"%i.code, i.help) + ) + f.sort() + f.extend( + [ + ("!", "unary not"), + ("&", "and"), + ("|", "or"), + ("(...)", "grouping"), + ] + ) + text.extend(format_keyvals(f, key="key", val="text", indent=4)) + + text.extend( + [ + "\n", + ("text", " Regexes are Python-style.\n"), + ("text", " Regexes can be specified as quoted strings.\n"), + ("text", " Header matching (~h, ~hq, ~hs) is against a string of the form \"name: value\".\n"), + ("text", " Expressions with no operators are regex matches against URL.\n"), + ("text", " Default binary operator is &.\n"), + ("head", "\n Examples:\n"), + ] + ) + examples = [ + ("google\.com", "Url containing \"google.com"), + ("~r ~b test", "Requests where body contains \"test\""), + ("!(~r & ~t \"text/html\")", "Anything but requests with a text/html content type."), + ] + text.extend(format_keyvals(examples, key="key", val="text", indent=4)) + return urwid.ListBox([urwid.Text(text)]) + + def view_help(self): + self.body = self.helptext() + self.header = None + self.nested = True + self.make_view() + + def prompt(self, prompt, callback): + self.statusbar.prompt(prompt) + self.view.set_focus("footer") + self.prompting = callback + + def prompt_onekey(self, prompt, keys, callback): + """ + Keys are a set of (word, key) tuples. The appropriate key in the + word is highlighted. + """ + prompt = [prompt, "("] + mkup = [] + for i, e in enumerate(keys): + parts = e[0].split(e[1], 1) + if parts[0]: + mkup.append(("text", parts[0])) + mkup.append(("key", e[1])) + if parts[1]: + mkup.append(("text", parts[1])) + if i < len(keys)-1: + mkup.append(",") + prompt.extend(mkup) + prompt.append(")? ") + self.onekey = "".join([i[1] for i in keys]) + self.prompt(prompt, callback) + + def prompt_done(self): + self.prompting = False + self.onekey = False + self.view.set_focus("body") + self.statusbar.message("") + + def prompt_execute(self, txt=None): + if not txt: + txt = self.statusbar.get_edit_text() + p = self.prompting + self.prompt_done() + msg = p(txt) + if msg: + self.statusbar.message(msg) + + def prompt_cancel(self): + self.prompt_done() + + def search(self, txt): + pass + + def set_limit(self, txt): + if txt: + f = filt.parse(txt) + if not f: + return "Invalid filter expression." + self.state.set_limit(f) + else: + self.state.set_limit(None) + self.sync_list_view() + + def set_intercept(self, txt): + if txt: + self.state.intercept = filt.parse(txt) + if not self.state.intercept: + return "Invalid filter expression." + else: + self.state.intercept = None + self.sync_list_view() + + def set_stickycookie(self, txt): + if txt: + self.stickycookie = filt.parse(txt) + if not self.stickycookie: + return "Invalid filter expression." + else: + self.stickyhosts = {} + self.stickycookie = None + + def drawscreen(self): + size = self.ui.get_cols_rows() + canvas = self.view.render(size, focus=1) + self.ui.draw_screen(size, canvas) + return size + + def loop(self): + q = Queue.Queue() + self.masterq = q + slave = controller.Slave(q, self.server) + slave.start() + try: + while not self._shutdown: + size = self.drawscreen() + self.statusbar.redraw() + self.tick(q) + keys = self.ui.get_input() + for k in keys: + if self.prompting: + if k == "esc": + self.prompt_cancel() + k = None + elif self.onekey: + if k == "enter": + self.prompt_cancel() + elif k in self.onekey: + self.prompt_execute(k) + k = None + elif k == "enter": + self.prompt_execute() + k = None + else: + self.statusbar.message("") + if k == "?": + self.view_help() + elif k == "l": + self.prompt("Limit: ", self.set_limit) + k = None + elif k == "i": + self.prompt("Intercept: ", self.set_intercept) + k = None + elif k == "C": + self.clear_connections() + elif k == "j": + k = "down" + elif k == "k": + k = "up" + elif k == " ": + k = "page down" + elif k in ('q','Q'): + if self.nested: + self.view_connlist() + else: + raise Stop + elif k == "s": + self.prompt("Sticky cookie: ", self.set_stickycookie) + k = None + if k: + self.view.keypress(size, k) + except (Stop, KeyboardInterrupt): + pass + self.shutdown() + + def shutdown(self): + for i in self.state.flow_list: + i.kill() + controller.Master.shutdown(self) + + def sync_list_view(self): + self.conn_list_view._modified() + + def clear_connections(self): + self.state.clear() + self.sync_list_view() + + def delete_connection(self, f): + self.state.delete_flow(f) + self.sync_list_view() + + def kill_connection(self, f): + self.state.kill_flow(f) + + def refresh_connection(self, c): + if hasattr(self.header, "refresh_connection"): + self.header.refresh_connection(c) + if hasattr(self.body, "refresh_connection"): + self.body.refresh_connection(c) + if hasattr(self.statusbar, "refresh_connection"): + self.statusbar.refresh_connection(c) + + # Handlers + def handle_browserconnection(self, r): + f = Flow(r) + self.state.add_browserconnect(f) + r.ack() + self.sync_list_view() + + def handle_error(self, r): + f = self.state.add_error(r) + if not f: + r.ack() + else: + self.sync_list_view() + self.refresh_connection(f) + + def handle_request(self, r): + f = self.state.add_request(r) + if not f: + r.ack() + else: + if f.match(self.stickycookie): + hid = (f.request.host, f.request.port) + if f.request.headers.has_key("cookie"): + self.stickyhosts[hid] = f.request.headers["cookie"] + elif hid in self.stickyhosts: + f.request.headers["cookie"] = self.stickyhosts[hid] + + if f.match(self.state.intercept): + f.intercept() + else: + r.ack() + self.sync_list_view() + self.refresh_connection(f) + + def handle_response(self, r): + f = self.state.add_response(r) + if not f: + r.ack() + else: + if f.match(self.stickycookie): + hid = (f.request.host, f.request.port) + if f.response.headers.has_key("set-cookie"): + self.stickyhosts[hid] = f.response.headers["set-cookie"] + + if f.match(self.state.intercept): + f.intercept() + else: + r.ack() + self.sync_list_view() + self.refresh_connection(f) diff --git a/libmproxy/controller.py b/libmproxy/controller.py new file mode 100644 index 000000000..4955aae3f --- /dev/null +++ b/libmproxy/controller.py @@ -0,0 +1,119 @@ + +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys +import Queue, threading + +#begin nocover + +class Msg: + def __init__(self): + self.q = Queue.Queue() + self.acked = False + + def ack(self, data=None): + self.acked = True + self.q.put(data or self) + + def send(self, masterq): + self.acked = False + masterq.put(self) + return self.q.get() + + +class Slave(threading.Thread): + def __init__(self, masterq, server): + self.masterq, self.server = masterq, server + self.server.set_mqueue(masterq) + threading.Thread.__init__(self) + + def run(self): + self.server.serve_forever() + + +class Master: + def __init__(self, server): + self.server = server + self._shutdown = False + self.masterq = None + + def tick(self, q): + try: + # Small timeout to prevent pegging the CPU + msg = q.get(timeout=0.01) + self.handle(msg) + except Queue.Empty: + pass + + def run(self): + q = Queue.Queue() + self.masterq = q + slave = Slave(q, self.server) + slave.start() + while not self._shutdown: + self.tick(q) + self.shutdown() + + def handle(self, msg): + c = "handle_" + msg.__class__.__name__.lower() + m = getattr(self, c, None) + if m: + m(msg) + else: + msg.ack() + + def shutdown(self): + if not self._shutdown: + self._shutdown = True + self.server.shutdown() + + +class DumpMaster(Master): + """ + A simple master that just dumps to screen. + """ + def __init__(self, server, verbosity): + self.verbosity = verbosity + Master.__init__(self, server) + + def run(self): + try: + return Master.run(self) + except KeyboardInterrupt: + self.shutdown() + + def handle_response(self, msg): + if 0 < self.verbosity < 3: + print >> sys.stderr, ">>", + print >> sys.stderr, msg.request.short() + if self.verbosity == 1: + print >> sys.stderr, "<<", + print >> sys.stderr, msg.short() + elif self.verbosity == 2: + print >> sys.stderr, "<<" + for i in msg.assemble().splitlines(): + print >> sys.stderr, "\t", i + print >> sys.stderr, "<<" + elif self.verbosity == 3: + print >> sys.stderr, ">>" + for i in msg.request.assemble().splitlines(): + print >> sys.stderr, "\t", i + print >> sys.stderr, ">>" + print >> sys.stderr, "<<" + for i in msg.assemble().splitlines(): + print >> sys.stderr, "\t", i + print >> sys.stderr, "<<" + msg.ack() diff --git a/libmproxy/filt.py b/libmproxy/filt.py new file mode 100644 index 000000000..49fff6c70 --- /dev/null +++ b/libmproxy/filt.py @@ -0,0 +1,316 @@ + +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +""" + The following operators are understood: + + ~q Request + ~s Response + + Headers: + + Patterns are matched against "name: value" strings. Field names are + all-lowercase. + + ~h rex Header line in either request or response + ~hq rex Header in request + ~hs rex Header in response + + ~b rex Expression in the body of either request or response + ~bq rex Expression in the body of request + ~bq rex Expression in the body of response + ~t rex Shortcut for content-type header. + + ~u rex URL + ~c CODE Response code. + rex Equivalent to ~u rex +""" +import re, sys +import pyparsing as pp + + +class _Token: + def dump(self, indent=0, fp=sys.stdout): + print >> fp, "\t"*indent, self.__class__.__name__, + if hasattr(self, "expr"): + print >> fp, "(%s)"%self.expr, + print >> fp + + +class _Action(_Token): + @classmethod + def make(klass, s, loc, toks): + return klass(*toks[1:]) + + +class FReq(_Action): + code = "q" + help = "Match request" + def __call__(self, conn): + return not conn.is_response() + + +class FResp(_Action): + code = "s" + help = "Match response" + def __call__(self, conn): + return conn.is_response() + + +class _Rex(_Action): + def __init__(self, expr): + self.expr = expr + self.re = re.compile(self.expr) + + +def _check_content_type(expr, o): + val = o.headers.get("content-type") + if val and re.search(expr, val[0]): + return True + return False + + +class FContentType(_Rex): + code = "t" + help = "Content-type header" + def __call__(self, o): + if _check_content_type(self.expr, o): + return True + elif o.is_response() and _check_content_type(self.expr, o.request): + return True + else: + return False + + +class FRequestContentType(_Rex): + code = "tq" + help = "Request Content-Type header" + def __call__(self, o): + if o.is_response(): + return _check_content_type(self.expr, o.request) + else: + return _check_content_type(self.expr, o) + + +class FResponseContentType(_Rex): + code = "ts" + help = "Request Content-Type header" + def __call__(self, o): + if o.is_response(): + return _check_content_type(self.expr, o) + else: + return False + + +class FHead(_Rex): + code = "h" + help = "Header" + def __call__(self, o): + val = o.headers.match_re(self.expr) + if not val and o.is_response(): + val = o.request.headers.match_re(self.expr) + return val + + +class FHeadRequest(_Rex): + code = "hq" + help = "Request header" + def __call__(self, o): + if o.is_response(): + h = o.request.headers + else: + h = o.headers + return h.match_re(self.expr) + + +class FHeadResponse(_Rex): + code = "hs" + help = "Response header" + def __call__(self, o): + if not o.is_response(): + return False + return o.headers.match_re(self.expr) + + +class FBod(_Rex): + code = "b" + help = "Body" + def __call__(self, o): + if o.content and re.search(self.expr, o.content): + return True + elif o.is_response() and o.request.content and re.search(self.expr, o.request.content): + return True + return False + + +class FBodRequest(_Rex): + code = "bq" + help = "Request body" + def __call__(self, o): + if o.is_response() and o.request.content and re.search(self.expr, o.request.content): + return True + elif not o.is_response() and o.content and re.search(self.expr, o.content): + return True + return False + + +class FBodResponse(_Rex): + code = "bs" + help = "Response body" + def __call__(self, o): + if not o.is_response(): + return False + elif o.content and re.search(self.expr, o.content): + return True + return False + + +class FUrl(_Rex): + code = "u" + help = "URL" + # FUrl is special, because it can be "naked". + @classmethod + def make(klass, s, loc, toks): + if len(toks) > 1: + toks = toks[1:] + return klass(*toks) + + def __call__(self, o): + if o.is_response(): + c = o.request + else: + c = o + return re.search(self.expr, c.url()) + + +class _Int(_Action): + def __init__(self, num): + self.num = int(num) + + +class FCode(_Int): + code = "c" + help = "HTTP response code" + def __call__(self, o): + if o.is_response(): + return o.code == self.num + return False + + +class FAnd(_Token): + def __init__(self, lst): + self.lst = lst + + def dump(self, indent=0, fp=sys.stdout): + print >> fp, "\t"*indent, self.__class__.__name__ + for i in self.lst: + i.dump(indent+1, fp) + + def __call__(self, o): + return all([i(o) for i in self.lst]) + + +class FOr(_Token): + def __init__(self, lst): + self.lst = lst + + def dump(self, indent=0, fp=sys.stdout): + print >> fp, "\t"*indent, self.__class__.__name__ + for i in self.lst: + i.dump(indent+1, fp) + + def __call__(self, o): + return any([i(o) for i in self.lst]) + + +class FNot(_Token): + def __init__(self, itm): + self.itm = itm[0] + + def dump(self, indent=0, fp=sys.stdout): + print >> fp, "\t"*indent, self.__class__.__name__ + self.itm.dump(indent + 1, fp) + + def __call__(self, o): + return not self.itm(o) + +filt_unary = [ + FReq, + FResp +] +filt_rex = [ + FHeadRequest, + FHeadResponse, + FHead, + FBodRequest, + FBodResponse, + FBod, + FUrl, + FRequestContentType, + FResponseContentType, + FContentType, +] +filt_int = [ + FCode +] +def _make(): + # Order is important - multi-char expressions need to come before narrow + # ones. + parts = [] + for klass in filt_unary: + f = pp.Literal("~%s"%klass.code) + f.setParseAction(klass.make) + parts.append(f) + + simplerex = "".join([c for c in pp.printables if c not in "()~'\""]) + rex = pp.Word(simplerex) |\ + pp.QuotedString("\"", escChar='\\') |\ + pp.QuotedString("'", escChar='\\') + for klass in filt_rex: + f = pp.Literal("~%s"%klass.code) + rex.copy() + f.setParseAction(klass.make) + parts.append(f) + + for klass in filt_int: + f = pp.Literal("~%s"%klass.code) + pp.Word(pp.nums) + f.setParseAction(klass.make) + parts.append(f) + + # A naked rex is a URL rex: + f = rex.copy() + f.setParseAction(FUrl.make) + parts.append(f) + + atom = pp.MatchFirst(parts) + expr = pp.operatorPrecedence( + atom, + [ + (pp.Literal("!").suppress(), 1, pp.opAssoc.RIGHT, lambda x: FNot(*x)), + (pp.Literal("&").suppress(), 2, pp.opAssoc.LEFT, lambda x: FAnd(*x)), + (pp.Literal("|").suppress(), 2, pp.opAssoc.LEFT, lambda x: FOr(*x)), + ] + ) + expr = pp.OneOrMore(expr) + return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x) +bnf = _make() + + +def parse(s): + try: + return bnf.parseString(s, parseAll=True)[0] + except pp.ParseException: + return None + diff --git a/libmproxy/proxy.py b/libmproxy/proxy.py new file mode 100644 index 000000000..c2a9b4942 --- /dev/null +++ b/libmproxy/proxy.py @@ -0,0 +1,374 @@ +""" + A simple proxy server implementation, which always reads all of a server + response into memory, performs some transformation, and then writes it back + to the client. + + Development started from Neil Schemenauer's munchy.py +""" +import sys, os, time, string, socket, urlparse, re, select, copy +import SocketServer, ssl +import utils, controller + +NAME = "mitmproxy" +config = None + + +class ProxyError(Exception): + def __init__(self, code, msg): + self.code, self.msg = code, msg + + def __str__(self): + return "ProxyError(%s, %s)"%(self.code, self.msg) + + +class Config: + def __init__(self, pemfile): + self.pemfile = pemfile + + +def try_del(dict, key): + try: + del dict[key] + except KeyError: + pass + + +def parse_url(url): + """ + Returns a (scheme, host, port, path) tuple, or None on error. + """ + scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) + if not scheme: + return None + if ':' in netloc: + host, port = string.split(netloc, ':') + port = int(port) + else: + host = netloc + port = 80 + path = urlparse.urlunparse(('', '', path, params, query, fragment)) + if not path: + path = "/" + return scheme, host, port, path + + +def parse_proxy_request(request): + """ + Parse a proxy request line. Return (method, scheme, host, port, path). + Raise ProxyError on error. + """ + try: + method, url, protocol = string.split(request) + except ValueError: + raise ProxyError(400, "Can't parse request") + if method in ['GET', 'HEAD', 'POST']: + if url.startswith("/"): + scheme, port, host, path = None, None, None, url + else: + parts = parse_url(url) + if not parts: + raise ProxyError(400, "Invalid url: %s"%url) + scheme, host, port, path = parts + elif method == 'CONNECT': + scheme = None + path = None + host, port = url.split(":") + port = int(port) + else: + raise ProxyError(501, "Unknown request method: %s" % method) + return method, scheme, host, port, path + + +class Request(controller.Msg): + FMT = '%s %s HTTP/1.0\r\n%s\r\n%s' + def __init__(self, connection, host, port, scheme, method, path, headers, content): + self.connection = connection + self.host, self.port, self.scheme = host, port, scheme + self.method, self.path, self.headers, self.content = method, path, headers, content + self.kill = False + controller.Msg.__init__(self) + + def copy(self): + c = copy.copy(self) + c.headers = self.headers.copy() + return c + + def url(self): + if (self.port, self.scheme) in [(80, "http"), (443, "https")]: + host = self.host + else: + host = "%s:%s"%(self.host, self.port) + return "%s://%s%s"%(self.scheme, host, self.path) + + def set_url(self, url): + parts = parse_url(url) + if not parts: + return False + self.scheme, self.host, self.port, self.path = parts + return True + + def is_response(self): + return False + + def short(self): + return "%s %s"%(self.method, self.url()) + + def assemble(self): + """ + Assembles the request for transmission to the server. We make some + modifications to make sure interception works properly. + """ + headers = self.headers.copy() + try_del(headers, 'accept-encoding') + try_del(headers, 'proxy-connection') + try_del(headers, 'keep-alive') + try_del(headers, 'connection') + headers["connection"] = ["close"] + data = (self.method, self.path, str(headers), self.content) + return self.FMT%data + + +class Response(controller.Msg): + FMT = '%s\r\n%s\r\n%s' + def __init__(self, request, code, proto, msg, headers, content): + self.request = request + self.code, self.proto, self.msg = code, proto, msg + self.headers, self.content = headers, content + self.kill = False + controller.Msg.__init__(self) + + def copy(self): + c = copy.copy(self) + c.headers = self.headers.copy() + return c + + def is_response(self): + return True + + def short(self): + return "%s %s"%(self.code, self.proto) + + def assemble(self): + """ + Assembles the response for transmission to the client. We make some + modifications to make sure interception works properly. + """ + headers = self.headers.copy() + try_del(headers, 'accept-encoding') + try_del(headers, 'proxy-connection') + try_del(headers, 'connection') + try_del(headers, 'keep-alive') + headers["connection"] = ["close"] + proto = "%s %s %s"%(self.proto, self.code, self.msg) + data = (proto, str(headers), self.content) + return self.FMT%data + + +class BrowserConnection(controller.Msg): + def __init__(self, address, port): + self.address, self.port = address, port + controller.Msg.__init__(self) + + def copy(self): + return copy.copy(self) + + +class Error(controller.Msg): + def __init__(self, connection, msg): + self.connection, self.msg = connection, msg + controller.Msg.__init__(self) + + def copy(self): + return copy.copy(self) + + +class FileLike: + def __init__(self, o): + self.o = o + + def __getattr__(self, attr): + return getattr(self.o, attr) + + def flush(self): + pass + + def readline(self): + result = '' + while True: + ch = self.read(1) + if not ch: + break + else: + result += ch + if ch == '\n': + break + return result + + +class ServerConnection: + def __init__(self, request): + self.request = request + self.server, self.rfile, self.wfile = None, None, None + self.connect() + self.send_request() + + def connect(self): + try: + addr = socket.gethostbyname(self.request.host) + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if self.request.scheme == "https": + server = ssl.wrap_socket(server) + server.connect((addr, self.request.port)) + except socket.error, err: + raise ProxyError(200, 'Error connecting to "%s": %s' % (self.request.host, err)) + self.server = server + self.rfile, self.wfile = server.makefile('rb'), server.makefile('wb') + + def send_request(self): + try: + self.wfile.write(self.request.assemble()) + self.wfile.flush() + except socket.error, err: + raise ProxyError(500, 'Error sending data to "%s": %s' % (request.host, err)) + + def read_response(self): + proto = self.rfile.readline() + parts = proto.strip().split(" ", 2) + if not len(parts) == 3: + raise ProxyError(200, "Invalid server response.") + proto, code, msg = parts + code = int(code) + headers = utils.Headers() + headers.read(self.rfile) + if headers.has_key("content-length"): + content = self.rfile.read(int(headers["content-length"][0])) + else: + content = self.rfile.read() + return Response(self.request, code, proto, msg, headers, content) + + def terminate(self): + try: + if not self.wfile.closed: + self.wfile.flush() + self.server.close() + except IOError: + pass + + +class ProxyHandler(SocketServer.StreamRequestHandler): + def __init__(self, request, client_address, server, q): + self.mqueue = q + SocketServer.StreamRequestHandler.__init__(self, request, client_address, server) + + def handle(self): + server = None + bc = BrowserConnection(*self.client_address) + bc.send(self.mqueue) + try: + request = self.read_request(bc) + request = request.send(self.mqueue) + if request.kill: + self.finish() + return + server = ServerConnection(request) + response = server.read_response() + response = response.send(self.mqueue) + if response.kill: + server.terminate() + self.finish() + return + self.send_response(response) + except IOError: + pass + except ProxyError, e: + err = Error(bc, e.msg) + err.send(self.mqueue) + self.send_error(e.code, e.msg) + if server: + server.terminate() + self.finish() + + def read_request(self, connection): + request = self.rfile.readline() + method, scheme, host, port, path = parse_proxy_request(request) + if not host: + raise ProxyError(200, 'Invalid request: %s'%request) + if method == "CONNECT": + # Discard additional headers sent to the proxy. Should I expose + # these to users? + while 1: + d = self.rfile.readline() + if not d.strip(): + break + self.wfile.write('HTTP/1.1 200 Connection established\r\n') + self.wfile.write('Proxy-agent: %s\r\n'%NAME) + self.wfile.write('\r\n') + self.wfile.flush() + self.connection = ssl.wrap_socket( + self.connection, + certfile = config.pemfile, + keyfile = config.pemfile, + server_side = True, + ssl_version = ssl.PROTOCOL_SSLv23, + do_handshake_on_connect = False + ) + self.rfile = FileLike(self.connection) + self.wfile = FileLike(self.connection) + method, _, _, _, path = parse_proxy_request(self.rfile.readline()) + scheme = "https" + headers = utils.Headers() + headers.read(self.rfile) + if method == 'POST' and not headers.has_key('content-length'): + raise ProxyError(400, "Missing Content-Length for POST method") + if headers.has_key("content-length"): + content = self.rfile.read(int(headers["content-length"][0])) + else: + content = "" + return Request(connection, host, port, scheme, method, path, headers, content) + + def send_response(self, response): + self.wfile.write(response.assemble()) + self.wfile.flush() + + def terminate(self, connection, wfile, rfile): + try: + if not getattr(wfile, "closed", False): + wfile.flush() + connection.close() + except IOError: + pass + + def finish(self): + self.terminate(self.connection, self.wfile, self.rfile) + + def send_error(self, code, body): + import BaseHTTPServer + response = BaseHTTPServer.BaseHTTPRequestHandler.responses[code][0] + self.wfile.write("HTTP/1.0 %s %s\r\n" % (code, response)) + self.wfile.write("Server: %s\r\n"%NAME) + self.wfile.write("Content-type: text/html\r\n") + self.wfile.write("\r\n") + self.wfile.write('\n%d %s\n\n' + '\n%s\n\n' % (code, response, body)) + self.wfile.flush() + self.wfile.close() + self.rfile.close() + + +ServerBase = SocketServer.ThreadingTCPServer +class ProxyServer(ServerBase): + allow_reuse_address = True + def __init__(self, port): + self.port = port + ServerBase.__init__(self, ('', port), ProxyHandler) + self.masterq = None + + def set_mqueue(self, q): + self.masterq = q + + def process_request(self, request, client_address): + return ServerBase.process_request(self, request, client_address) + + def finish_request(self, request, client_address): + self.RequestHandlerClass(request, client_address, self, self.masterq) + diff --git a/libmproxy/pyparsing.py b/libmproxy/pyparsing.py new file mode 100644 index 000000000..06b11d98f --- /dev/null +++ b/libmproxy/pyparsing.py @@ -0,0 +1,3707 @@ +# module pyparsing.py +# +# Copyright (c) 2003-2009 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +#from __future__ import generators + +__doc__ = \ +""" +pyparsing module - Classes and methods to define and execute parsing grammars + +The pyparsing module is an alternative approach to creating and executing simple grammars, +vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you +don't need to learn a new syntax for defining grammars or matching expressions - the parsing module +provides a library of classes that you use to construct the grammar directly in Python. + +Here is a program to parse "Hello, World!" (or any greeting of the form ", !"):: + + from pyparsing import Word, alphas + + # define grammar of a greeting + greet = Word( alphas ) + "," + Word( alphas ) + "!" + + hello = "Hello, World!" + print hello, "->", greet.parseString( hello ) + +The program outputs the following:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +The Python representation of the grammar is quite readable, owing to the self-explanatory +class names, and the use of '+', '|' and '^' operators. + +The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an +object with named attributes. + +The pyparsing module handles some of the problems that are typically vexing when writing text parsers: + - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) + - quoted strings + - embedded comments +""" + +__version__ = "1.5.2" +__versionTime__ = "17 February 2009 19:45" +__author__ = "Paul McGuire " + +import string +from weakref import ref as wkref +import copy +import sys +import warnings +import re +import sre_constants +#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) + +__all__ = [ +'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', +'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', +'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', +'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', +'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', +'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', +'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', +'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', +'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', +'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', +'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', +'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', +'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', +'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', +'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', +'indentedBlock', 'originalTextFor', +] + + +""" +Detect if we are running version 3.X and make appropriate changes +Robert A. Clark +""" +if sys.version_info[0] > 2: + _PY3K = True + _MAX_INT = sys.maxsize + basestring = str +else: + _PY3K = False + _MAX_INT = sys.maxint + +if not _PY3K: + def _ustr(obj): + """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. + """ + if isinstance(obj,unicode): + return obj + + try: + # If this works, then _ustr(obj) has the same behaviour as str(obj), so + # it won't break any existing code. + return str(obj) + + except UnicodeEncodeError: + # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) + # state that "The return value must be a string object". However, does a + # unicode object (being a subclass of basestring) count as a "string + # object"? + # If so, then return a unicode object: + return unicode(obj) + # Else encode it... but how? There are many choices... :) + # Replace unprintables with escape codes? + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') + # Replace unprintables with question marks? + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') + # ... +else: + _ustr = str + unichr = chr + +if not _PY3K: + def _str2dict(strg): + return dict( [(c,0) for c in strg] ) +else: + _str2dict = set + +def _xml_escape(data): + """Escape &, <, >, ", ', etc. in a string of data.""" + + # ampersand must be replaced first + from_symbols = '&><"\'' + to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] + for from_,to_ in zip(from_symbols, to_symbols): + data = data.replace(from_, to_) + return data + +class _Constants(object): + pass + +if not _PY3K: + alphas = string.lowercase + string.uppercase +else: + alphas = string.ascii_lowercase + string.ascii_uppercase +nums = string.digits +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +_bslash = chr(92) +printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) + +class ParseBaseException(Exception): + """base exception class for all parsing runtime exceptions""" + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, pstr, loc=0, msg=None, elem=None ): + self.loc = loc + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr + self.parserElement = elem + + def __getattr__( self, aname ): + """supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + if( aname == "lineno" ): + return lineno( self.loc, self.pstr ) + elif( aname in ("col", "column") ): + return col( self.loc, self.pstr ) + elif( aname == "line" ): + return line( self.loc, self.pstr ) + else: + raise AttributeError(aname) + + def __str__( self ): + return "%s (at char %d), (line:%d, col:%d)" % \ + ( self.msg, self.loc, self.lineno, self.column ) + def __repr__( self ): + return _ustr(self) + def markInputline( self, markerString = ">!<" ): + """Extracts the exception line from the input string, and marks + the location of the exception with a special symbol. + """ + line_str = self.line + line_column = self.column - 1 + if markerString: + line_str = "".join( [line_str[:line_column], + markerString, line_str[line_column:]]) + return line_str.strip() + def __dir__(self): + return "loc msg pstr parserElement lineno col line " \ + "markInputLine __str__ __repr__".split() + +class ParseException(ParseBaseException): + """exception thrown when parse expressions don't match class; + supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + pass + +class ParseFatalException(ParseBaseException): + """user-throwable exception thrown when inconsistent parse content + is found; stops all parsing immediately""" + pass + +class ParseSyntaxException(ParseFatalException): + """just like ParseFatalException, but thrown internally when an + ErrorStop indicates that parsing is to stop immediately because + an unbacktrackable syntax error has been found""" + def __init__(self, pe): + super(ParseSyntaxException, self).__init__( + pe.pstr, pe.loc, pe.msg, pe.parserElement) + +#~ class ReparseException(ParseBaseException): + #~ """Experimental class - parse actions can raise this exception to cause + #~ pyparsing to reparse the input string: + #~ - with a modified input string, and/or + #~ - with a modified start location + #~ Set the values of the ReparseException in the constructor, and raise the + #~ exception in a parse action to cause pyparsing to use the new string/location. + #~ Setting the values as None causes no change to be made. + #~ """ + #~ def __init_( self, newstring, restartLoc ): + #~ self.newParseText = newstring + #~ self.reparseLoc = restartLoc + +class RecursiveGrammarException(Exception): + """exception thrown by validate() if the grammar could be improperly recursive""" + def __init__( self, parseElementList ): + self.parseElementTrace = parseElementList + + def __str__( self ): + return "RecursiveGrammarException: %s" % self.parseElementTrace + +class _ParseResultsWithOffset(object): + def __init__(self,p1,p2): + self.tup = (p1,p2) + def __getitem__(self,i): + return self.tup[i] + def __repr__(self): + return repr(self.tup) + def setOffset(self,i): + self.tup = (self.tup[0],i) + +class ParseResults(object): + """Structured parse results, to provide multiple means of access to the parsed data: + - as a list (len(results)) + - by list index (results[0], results[1], etc.) + - by attribute (results.) + """ + __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) + def __new__(cls, toklist, name=None, asList=True, modal=True ): + if isinstance(toklist, cls): + return toklist + retobj = object.__new__(cls) + retobj.__doinit = True + return retobj + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, toklist, name=None, asList=True, modal=True ): + if self.__doinit: + self.__doinit = False + self.__name = None + self.__parent = None + self.__accumNames = {} + if isinstance(toklist, list): + self.__toklist = toklist[:] + else: + self.__toklist = [toklist] + self.__tokdict = dict() + + if name: + if not modal: + self.__accumNames[name] = 0 + if isinstance(name,int): + name = _ustr(name) # will always return a str, but use _ustr for consistency + self.__name = name + if not toklist in (None,'',[]): + if isinstance(toklist,basestring): + toklist = [ toklist ] + if asList: + if isinstance(toklist,ParseResults): + self[name] = _ParseResultsWithOffset(toklist.copy(),0) + else: + self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) + self[name].__name = name + else: + try: + self[name] = toklist[0] + except (KeyError,TypeError,IndexError): + self[name] = toklist + + def __getitem__( self, i ): + if isinstance( i, (int,slice) ): + return self.__toklist[i] + else: + if i not in self.__accumNames: + return self.__tokdict[i][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[i] ]) + + def __setitem__( self, k, v ): + if isinstance(v,_ParseResultsWithOffset): + self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] + sub = v[0] + elif isinstance(k,int): + self.__toklist[k] = v + sub = v + else: + self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] + sub = v + if isinstance(sub,ParseResults): + sub.__parent = wkref(self) + + def __delitem__( self, i ): + if isinstance(i,(int,slice)): + mylen = len( self.__toklist ) + del self.__toklist[i] + + # convert int to slice + if isinstance(i, int): + if i < 0: + i += mylen + i = slice(i, i+1) + # get removed indices + removed = list(range(*i.indices(mylen))) + removed.reverse() + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for j in removed: + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) + else: + del self.__tokdict[i] + + def __contains__( self, k ): + return k in self.__tokdict + + def __len__( self ): return len( self.__toklist ) + def __bool__(self): return len( self.__toklist ) > 0 + __nonzero__ = __bool__ + def __iter__( self ): return iter( self.__toklist ) + def __reversed__( self ): return iter( reversed(self.__toklist) ) + def keys( self ): + """Returns all named result keys.""" + return self.__tokdict.keys() + + def pop( self, index=-1 ): + """Removes and returns item at specified index (default=last). + Will work with either numeric indices or dict-key indicies.""" + ret = self[index] + del self[index] + return ret + + def get(self, key, defaultValue=None): + """Returns named result matching the given key, or if there is no + such name, then returns the given defaultValue or None if no + defaultValue is specified.""" + if key in self: + return self[key] + else: + return defaultValue + + def insert( self, index, insStr ): + self.__toklist.insert(index, insStr) + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) + + def items( self ): + """Returns all named result keys and values as a list of tuples.""" + return [(k,self[k]) for k in self.__tokdict] + + def values( self ): + """Returns all named result values.""" + return [ v[-1][0] for v in self.__tokdict.values() ] + + def __getattr__( self, name ): + if name not in self.__slots__: + if name in self.__tokdict: + if name not in self.__accumNames: + return self.__tokdict[name][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[name] ]) + else: + return "" + return None + + def __add__( self, other ): + ret = self.copy() + ret += other + return ret + + def __iadd__( self, other ): + if other.__tokdict: + offset = len(self.__toklist) + addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) + otheritems = other.__tokdict.items() + otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) + for (k,vlist) in otheritems for v in vlist] + for k,v in otherdictitems: + self[k] = v + if isinstance(v[0],ParseResults): + v[0].__parent = wkref(self) + + self.__toklist += other.__toklist + self.__accumNames.update( other.__accumNames ) + del other + return self + + def __repr__( self ): + return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) + + def __str__( self ): + out = "[" + sep = "" + for i in self.__toklist: + if isinstance(i, ParseResults): + out += sep + _ustr(i) + else: + out += sep + repr(i) + sep = ", " + out += "]" + return out + + def _asStringList( self, sep='' ): + out = [] + for item in self.__toklist: + if out and sep: + out.append(sep) + if isinstance( item, ParseResults ): + out += item._asStringList() + else: + out.append( _ustr(item) ) + return out + + def asList( self ): + """Returns the parse results as a nested list of matching tokens, all converted to strings.""" + out = [] + for res in self.__toklist: + if isinstance(res,ParseResults): + out.append( res.asList() ) + else: + out.append( res ) + return out + + def asDict( self ): + """Returns the named parse results as dictionary.""" + return dict( self.items() ) + + def copy( self ): + """Returns a new copy of a ParseResults object.""" + ret = ParseResults( self.__toklist ) + ret.__tokdict = self.__tokdict.copy() + ret.__parent = self.__parent + ret.__accumNames.update( self.__accumNames ) + ret.__name = self.__name + return ret + + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): + """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" + nl = "\n" + out = [] + namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() + for v in vlist ] ) + nextLevelIndent = indent + " " + + # collapse out indents if formatting is not desired + if not formatted: + indent = "" + nextLevelIndent = "" + nl = "" + + selfTag = None + if doctag is not None: + selfTag = doctag + else: + if self.__name: + selfTag = self.__name + + if not selfTag: + if namedItemsOnly: + return "" + else: + selfTag = "ITEM" + + out += [ nl, indent, "<", selfTag, ">" ] + + worklist = self.__toklist + for i,res in enumerate(worklist): + if isinstance(res,ParseResults): + if i in namedItems: + out += [ res.asXML(namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + out += [ res.asXML(None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + # individual token, see if there is a name for it + resTag = None + if i in namedItems: + resTag = namedItems[i] + if not resTag: + if namedItemsOnly: + continue + else: + resTag = "ITEM" + xmlBodyText = _xml_escape(_ustr(res)) + out += [ nl, nextLevelIndent, "<", resTag, ">", + xmlBodyText, + "" ] + + out += [ nl, indent, "" ] + return "".join(out) + + def __lookup(self,sub): + for k,vlist in self.__tokdict.items(): + for v,loc in vlist: + if sub is v: + return k + return None + + def getName(self): + """Returns the results name for this token expression.""" + if self.__name: + return self.__name + elif self.__parent: + par = self.__parent() + if par: + return par.__lookup(self) + else: + return None + elif (len(self) == 1 and + len(self.__tokdict) == 1 and + self.__tokdict.values()[0][0][1] in (0,-1)): + return self.__tokdict.keys()[0] + else: + return None + + def dump(self,indent='',depth=0): + """Diagnostic method for listing out the contents of a ParseResults. + Accepts an optional indent argument so that this string can be embedded + in a nested display of other data.""" + out = [] + out.append( indent+_ustr(self.asList()) ) + keys = self.items() + keys.sort() + for k,v in keys: + if out: + out.append('\n') + out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) + if isinstance(v,ParseResults): + if v.keys(): + #~ out.append('\n') + out.append( v.dump(indent,depth+1) ) + #~ out.append('\n') + else: + out.append(_ustr(v)) + else: + out.append(_ustr(v)) + #~ out.append('\n') + return "".join(out) + + # add support for pickle protocol + def __getstate__(self): + return ( self.__toklist, + ( self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name ) ) + + def __setstate__(self,state): + self.__toklist = state[0] + self.__tokdict, \ + par, \ + inAccumNames, \ + self.__name = state[1] + self.__accumNames = {} + self.__accumNames.update(inAccumNames) + if par is not None: + self.__parent = wkref(par) + else: + self.__parent = None + + def __dir__(self): + return dir(super(ParseResults,self)) + self.keys() + +def col (loc,strg): + """Returns current column within a string, counting newlines as line separators. + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return (loc} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return strg.count("\n",0,loc) + 1 + +def line( loc, strg ): + """Returns the line of text containing loc within a string, counting newlines as line separators. + """ + lastCR = strg.rfind("\n", 0, loc) + nextCR = strg.find("\n", loc) + if nextCR > 0: + return strg[lastCR+1:nextCR] + else: + return strg[lastCR+1:] + +def _defaultStartDebugAction( instring, loc, expr ): + print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + +def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): + print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) + +def _defaultExceptionDebugAction( instring, loc, expr, exc ): + print ("Exception raised:" + _ustr(exc)) + +def nullDebugAction(*args): + """'Do-nothing' debug action, to suppress debugging output during parsing.""" + pass + +class ParserElement(object): + """Abstract base level parser element class.""" + DEFAULT_WHITE_CHARS = " \n\t\r" + + def setDefaultWhitespaceChars( chars ): + """Overrides the default whitespace chars + """ + ParserElement.DEFAULT_WHITE_CHARS = chars + setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) + + def __init__( self, savelist=False ): + self.parseAction = list() + self.failAction = None + #~ self.name = "" # don't define self.name, let subclasses try/except upcall + self.strRepr = None + self.resultsName = None + self.saveAsList = savelist + self.skipWhitespace = True + self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + self.copyDefaultWhiteChars = True + self.mayReturnEmpty = False # used when checking for left-recursion + self.keepTabs = False + self.ignoreExprs = list() + self.debug = False + self.streamlined = False + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index + self.errmsg = "" + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = ( None, None, None ) #custom debug actions + self.re = None + self.callPreparse = True # used to avoid redundant calls to preParse + self.callDuringTry = False + + def copy( self ): + """Make a copy of this ParserElement. Useful for defining different parse actions + for the same parsing pattern, using copies of the original parse element.""" + cpy = copy.copy( self ) + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + return cpy + + def setName( self, name ): + """Define name for this expression, for use in debugging.""" + self.name = name + self.errmsg = "Expected " + self.name + if hasattr(self,"exception"): + self.exception.msg = self.errmsg + return self + + def setResultsName( self, name, listAllMatches=False ): + """Define name for referencing matching tokens as a nested attribute + of the returned parse results. + NOTE: this returns a *copy* of the original ParserElement object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + """ + newself = self.copy() + newself.resultsName = name + newself.modalResults = not listAllMatches + return newself + + def setBreak(self,breakFlag = True): + """Method to invoke the Python pdb debugger when this element is + about to be parsed. Set breakFlag to True to enable, False to + disable. + """ + if breakFlag: + _parseMethod = self._parse + def breaker(instring, loc, doActions=True, callPreParse=True): + import pdb + pdb.set_trace() + return _parseMethod( instring, loc, doActions, callPreParse ) + breaker._originalParseMethod = _parseMethod + self._parse = breaker + else: + if hasattr(self._parse,"_originalParseMethod"): + self._parse = self._parse._originalParseMethod + return self + + def _normalizeParseActionArgs( f ): + """Internal method used to decorate parse actions that take fewer than 3 arguments, + so that all parse actions can be called as f(s,l,t).""" + STAR_ARGS = 4 + + try: + restore = None + if isinstance(f,type): + restore = f + f = f.__init__ + if not _PY3K: + codeObj = f.func_code + else: + codeObj = f.code + if codeObj.co_flags & STAR_ARGS: + return f + numargs = codeObj.co_argcount + if not _PY3K: + if hasattr(f,"im_self"): + numargs -= 1 + else: + if hasattr(f,"__self__"): + numargs -= 1 + if restore: + f = restore + except AttributeError: + try: + if not _PY3K: + call_im_func_code = f.__call__.im_func.func_code + else: + call_im_func_code = f.__code__ + + # not a function, must be a callable object, get info from the + # im_func binding of its bound __call__ method + if call_im_func_code.co_flags & STAR_ARGS: + return f + numargs = call_im_func_code.co_argcount + if not _PY3K: + if hasattr(f.__call__,"im_self"): + numargs -= 1 + else: + if hasattr(f.__call__,"__self__"): + numargs -= 0 + except AttributeError: + if not _PY3K: + call_func_code = f.__call__.func_code + else: + call_func_code = f.__call__.__code__ + # not a bound method, get info directly from __call__ method + if call_func_code.co_flags & STAR_ARGS: + return f + numargs = call_func_code.co_argcount + if not _PY3K: + if hasattr(f.__call__,"im_self"): + numargs -= 1 + else: + if hasattr(f.__call__,"__self__"): + numargs -= 1 + + + #~ print ("adding function %s with %d args" % (f.func_name,numargs)) + if numargs == 3: + return f + else: + if numargs > 3: + def tmp(s,l,t): + return f(f.__call__.__self__, s,l,t) + if numargs == 2: + def tmp(s,l,t): + return f(l,t) + elif numargs == 1: + def tmp(s,l,t): + return f(t) + else: #~ numargs == 0: + def tmp(s,l,t): + return f() + try: + tmp.__name__ = f.__name__ + except (AttributeError,TypeError): + # no need for special handling if attribute doesnt exist + pass + try: + tmp.__doc__ = f.__doc__ + except (AttributeError,TypeError): + # no need for special handling if attribute doesnt exist + pass + try: + tmp.__dict__.update(f.__dict__) + except (AttributeError,TypeError): + # no need for special handling if attribute doesnt exist + pass + return tmp + _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) + + def setParseAction( self, *fns, **kwargs ): + """Define action to perform when successfully matching parse element definition. + Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), + fn(loc,toks), fn(toks), or just fn(), where: + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a ParseResults object + If the functions in fns modify the tokens, they can return them as the return + value from fn, and the modified list of tokens will replace the original. + Otherwise, fn does not need to return any value. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) + self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def addParseAction( self, *fns, **kwargs ): + """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" + self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) + self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def setFailAction( self, fn ): + """Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + fn(s,loc,expr,err) where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw ParseFatalException + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + + def _skipIgnorables( self, instring, loc ): + exprsFound = True + while exprsFound: + exprsFound = False + for e in self.ignoreExprs: + try: + while 1: + loc,dummy = e._parse( instring, loc ) + exprsFound = True + except ParseException: + pass + return loc + + def preParse( self, instring, loc ): + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + + if self.skipWhitespace: + wt = self.whiteChars + instrlen = len(instring) + while loc < instrlen and instring[loc] in wt: + loc += 1 + + return loc + + def parseImpl( self, instring, loc, doActions=True ): + return loc, [] + + def postParse( self, instring, loc, tokenlist ): + return tokenlist + + #~ @profile + def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): + debugging = ( self.debug ) #and doActions ) + + if debugging or self.failAction: + #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + if (self.debugActions[0] ): + self.debugActions[0]( instring, loc, self ) + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = loc + try: + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + except ParseBaseException, err: + #~ print ("Exception raised:", err) + if self.debugActions[2]: + self.debugActions[2]( instring, tokensStart, self, err ) + if self.failAction: + self.failAction( instring, tokensStart, self, err ) + raise + else: + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = loc + if self.mayIndexError or loc >= len(instring): + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + else: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + + tokens = self.postParse( instring, loc, tokens ) + + retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) + if self.parseAction and (doActions or self.callDuringTry): + if debugging: + try: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + except ParseBaseException, err: + #~ print "Exception raised in user parse action:", err + if (self.debugActions[2] ): + self.debugActions[2]( instring, tokensStart, self, err ) + raise + else: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + + if debugging: + #~ print ("Matched",self,"->",retTokens.asList()) + if (self.debugActions[1] ): + self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) + + return loc, retTokens + + def tryParse( self, instring, loc ): + try: + return self._parse( instring, loc, doActions=False )[0] + except ParseFatalException: + raise ParseException( instring, loc, self.errmsg, self) + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): + lookup = (self,instring,loc,callPreParse,doActions) + if lookup in ParserElement._exprArgCache: + value = ParserElement._exprArgCache[ lookup ] + if isinstance(value,Exception): + raise value + return value + else: + try: + value = self._parseNoCache( instring, loc, doActions, callPreParse ) + ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) + return value + except ParseBaseException, pe: + ParserElement._exprArgCache[ lookup ] = pe + raise + + _parse = _parseNoCache + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + _exprArgCache = {} + def resetCache(): + ParserElement._exprArgCache.clear() + resetCache = staticmethod(resetCache) + + _packratEnabled = False + def enablePackrat(): + """Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method ParserElement.enablePackrat(). If + your program uses psyco to "compile as you go", you must call + enablePackrat before calling psyco.full(). If you do not do this, + Python will crash. For best results, call enablePackrat() immediately + after importing pyparsing. + """ + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + ParserElement._parse = ParserElement._parseCache + enablePackrat = staticmethod(enablePackrat) + + def parseString( self, instring, parseAll=False ): + """Execute the parse expression with the given string. + This is the main interface to the client code, once the complete + expression has been built. + + If you want the grammar to require that the entire input string be + successfully parsed, then set parseAll to True (equivalent to ending + the grammar with StringEnd()). + + Note: parseString implicitly calls expandtabs() on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the loc argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling parseWithTabs on your grammar before calling parseString + (see L{I{parseWithTabs}}) + - define your parse action using the full (s,loc,toks) signature, and + reference the input string using the parse action's s argument + - explictly expand the tabs in your input string before calling + parseString + """ + ParserElement.resetCache() + if not self.streamlined: + self.streamline() + #~ self.saveAsList = True + for e in self.ignoreExprs: + e.streamline() + if not self.keepTabs: + instring = instring.expandtabs() + try: + loc, tokens = self._parse( instring, 0 ) + if parseAll: + loc = self.preParse( instring, loc ) + StringEnd()._parse( instring, loc ) + except ParseBaseException, exc: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc + else: + return tokens + + def scanString( self, instring, maxMatches=_MAX_INT ): + """Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + maxMatches argument, to clip scanning after 'n' matches are found. + + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}} for more information on parsing + strings with embedded tabs.""" + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + + if not self.keepTabs: + instring = _ustr(instring).expandtabs() + instrlen = len(instring) + loc = 0 + preparseFn = self.preParse + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + try: + while loc <= instrlen and matches < maxMatches: + try: + preloc = preparseFn( instring, loc ) + nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) + except ParseException: + loc = preloc+1 + else: + matches += 1 + yield tokens, preloc, nextLoc + loc = nextLoc + except ParseBaseException, pe: + raise pe + + def transformString( self, instring ): + """Extension to scanString, to modify matching text with modified tokens that may + be returned from a parse action. To use transformString, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking transformString() on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. transformString() returns the resulting transformed string.""" + out = [] + lastE = 0 + # force preservation of s, to minimize unwanted transformation of string, and to + # keep string locs straight between transformString and scanString + self.keepTabs = True + try: + for t,s,e in self.scanString( instring ): + out.append( instring[lastE:s] ) + if t: + if isinstance(t,ParseResults): + out += t.asList() + elif isinstance(t,list): + out += t + else: + out.append(t) + lastE = e + out.append(instring[lastE:]) + return "".join(map(_ustr,out)) + except ParseBaseException, pe: + raise pe + + def searchString( self, instring, maxMatches=_MAX_INT ): + """Another extension to scanString, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + maxMatches argument, to clip searching after 'n' matches are found. + """ + try: + return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) + except ParseBaseException, pe: + raise pe + + def __add__(self, other ): + """Implementation of + operator - returns And""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, other ] ) + + def __radd__(self, other ): + """Implementation of + operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other + self + + def __sub__(self, other): + """Implementation of - operator, returns And with error stop""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, And._ErrorStop(), other ] ) + + def __rsub__(self, other ): + """Implementation of - operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other - self + + def __mul__(self,other): + if isinstance(other,int): + minElements, optElements = other,0 + elif isinstance(other,tuple): + other = (other + (None, None))[:2] + if other[0] is None: + other = (0, other[1]) + if isinstance(other[0],int) and other[1] is None: + if other[0] == 0: + return ZeroOrMore(self) + if other[0] == 1: + return OneOrMore(self) + else: + return self*other[0] + ZeroOrMore(self) + elif isinstance(other[0],int) and isinstance(other[1],int): + minElements, optElements = other + optElements -= minElements + else: + raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) + else: + raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) + + if minElements < 0: + raise ValueError("cannot multiply ParserElement by negative value") + if optElements < 0: + raise ValueError("second tuple value must be greater or equal to first tuple value") + if minElements == optElements == 0: + raise ValueError("cannot multiply ParserElement by 0 or (0,0)") + + if (optElements): + def makeOptionalList(n): + if n>1: + return Optional(self + makeOptionalList(n-1)) + else: + return Optional(self) + if minElements: + if minElements == 1: + ret = self + makeOptionalList(optElements) + else: + ret = And([self]*minElements) + makeOptionalList(optElements) + else: + ret = makeOptionalList(optElements) + else: + if minElements == 1: + ret = self + else: + ret = And([self]*minElements) + return ret + + def __rmul__(self, other): + return self.__mul__(other) + + def __or__(self, other ): + """Implementation of | operator - returns MatchFirst""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return MatchFirst( [ self, other ] ) + + def __ror__(self, other ): + """Implementation of | operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other | self + + def __xor__(self, other ): + """Implementation of ^ operator - returns Or""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Or( [ self, other ] ) + + def __rxor__(self, other ): + """Implementation of ^ operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other ^ self + + def __and__(self, other ): + """Implementation of & operator - returns Each""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Each( [ self, other ] ) + + def __rand__(self, other ): + """Implementation of & operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other & self + + def __invert__( self ): + """Implementation of ~ operator - returns NotAny""" + return NotAny( self ) + + def __call__(self, name): + """Shortcut for setResultsName, with listAllMatches=default:: + userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") + could be written as:: + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + """ + return self.setResultsName(name) + + def suppress( self ): + """Suppresses the output of this ParserElement; useful to keep punctuation from + cluttering up returned output. + """ + return Suppress( self ) + + def leaveWhitespace( self ): + """Disables the skipping of whitespace before matching the characters in the + ParserElement's defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. + """ + self.skipWhitespace = False + return self + + def setWhitespaceChars( self, chars ): + """Overrides the default whitespace chars + """ + self.skipWhitespace = True + self.whiteChars = chars + self.copyDefaultWhiteChars = False + return self + + def parseWithTabs( self ): + """Overrides default behavior to expand s to spaces before parsing the input string. + Must be called before parseString when the input grammar contains elements that + match characters.""" + self.keepTabs = True + return self + + def ignore( self, other ): + """Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + """ + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + self.ignoreExprs.append( other ) + else: + self.ignoreExprs.append( Suppress( other ) ) + return self + + def setDebugActions( self, startAction, successAction, exceptionAction ): + """Enable display of debugging messages while doing pattern matching.""" + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction) + self.debug = True + return self + + def setDebug( self, flag=True ): + """Enable display of debugging messages while doing pattern matching. + Set flag to True to enable, False to disable.""" + if flag: + self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) + else: + self.debug = False + return self + + def __str__( self ): + return self.name + + def __repr__( self ): + return _ustr(self) + + def streamline( self ): + self.streamlined = True + self.strRepr = None + return self + + def checkRecursion( self, parseElementList ): + pass + + def validate( self, validateTrace=[] ): + """Check defined expressions for valid structure, check for infinite recursive definitions.""" + self.checkRecursion( [] ) + + def parseFile( self, file_or_filename, parseAll=False ): + """Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. + """ + try: + file_contents = file_or_filename.read() + except AttributeError: + f = open(file_or_filename, "rb") + file_contents = f.read() + f.close() + try: + return self.parseString(file_contents, parseAll) + except ParseBaseException, exc: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc + + def getException(self): + return ParseException("",0,self.errmsg,self) + + def __getattr__(self,aname): + if aname == "myException": + self.myException = ret = self.getException(); + return ret; + else: + raise AttributeError("no such attribute " + aname) + + def __eq__(self,other): + if isinstance(other, ParserElement): + return self is other or self.__dict__ == other.__dict__ + elif isinstance(other, basestring): + try: + self.parseString(_ustr(other), parseAll=True) + return True + except ParseBaseException: + return False + else: + return super(ParserElement,self)==other + + def __ne__(self,other): + return not (self == other) + + def __hash__(self): + return hash(id(self)) + + def __req__(self,other): + return self == other + + def __rne__(self,other): + return not (self == other) + + +class Token(ParserElement): + """Abstract ParserElement subclass, for defining atomic matching patterns.""" + def __init__( self ): + super(Token,self).__init__( savelist=False ) + #self.myException = ParseException("",0,"",self) + + def setName(self, name): + s = super(Token,self).setName(name) + self.errmsg = "Expected " + self.name + #s.myException.msg = self.errmsg + return s + + +class Empty(Token): + """An empty token, will always match.""" + def __init__( self ): + super(Empty,self).__init__() + self.name = "Empty" + self.mayReturnEmpty = True + self.mayIndexError = False + + +class NoMatch(Token): + """A token that will never match.""" + def __init__( self ): + super(NoMatch,self).__init__() + self.name = "NoMatch" + self.mayReturnEmpty = True + self.mayIndexError = False + self.errmsg = "Unmatchable token" + #self.myException.msg = self.errmsg + + def parseImpl( self, instring, loc, doActions=True ): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + +class Literal(Token): + """Token to exactly match a specified string.""" + def __init__( self, matchString ): + super(Literal,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Literal; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.__class__ = Empty + self.name = '"%s"' % _ustr(self.match) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + #self.myException.msg = self.errmsg + self.mayIndexError = False + + # Performance tuning: this routine gets called a *lot* + # if this is a single character match string and the first character matches, + # short-circuit as quickly as possible, and avoid calling startswith + #~ @profile + def parseImpl( self, instring, loc, doActions=True ): + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc +_L = Literal + +class Keyword(Token): + """Token to exactly match a specified string as a keyword, that is, it must be + immediately followed by a non-keyword character. Compare with Literal:: + Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. + Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' + Accepts two optional constructor arguments in addition to the keyword string: + identChars is a string of characters that would be valid identifier characters, + defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive + matching, default is False. + """ + DEFAULT_KEYWORD_CHARS = alphanums+"_$" + + def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): + super(Keyword,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Keyword; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.name = '"%s"' % self.match + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.caseless = caseless + if caseless: + self.caselessmatch = matchString.upper() + identChars = identChars.upper() + self.identChars = _str2dict(identChars) + + def parseImpl( self, instring, loc, doActions=True ): + if self.caseless: + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and + (loc == 0 or instring[loc-1].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + else: + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and + (loc == 0 or instring[loc-1] not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + def copy(self): + c = super(Keyword,self).copy() + c.identChars = Keyword.DEFAULT_KEYWORD_CHARS + return c + + def setDefaultKeywordChars( chars ): + """Overrides the default Keyword chars + """ + Keyword.DEFAULT_KEYWORD_CHARS = chars + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) + +class CaselessLiteral(Literal): + """Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + """ + def __init__( self, matchString ): + super(CaselessLiteral,self).__init__( matchString.upper() ) + # Preserve the defining literal. + self.returnString = matchString + self.name = "'%s'" % self.returnString + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + + def parseImpl( self, instring, loc, doActions=True ): + if instring[ loc:loc+self.matchLen ].upper() == self.match: + return loc+self.matchLen, self.returnString + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class CaselessKeyword(Keyword): + def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): + super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) + + def parseImpl( self, instring, loc, doActions=True ): + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Word(Token): + """Token for matching words composed of allowed character sets. + Defined with string containing all allowed initial characters, + an optional string containing allowed body characters (if omitted, + defaults to the initial character set), and an optional minimum, + maximum, and/or exact length. The default value for min is 1 (a + minimum value < 1 is not valid); the default values for max and exact + are 0, meaning no maximum or exact length restriction. + """ + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): + super(Word,self).__init__() + self.initCharsOrig = initChars + self.initChars = _str2dict(initChars) + if bodyChars : + self.bodyCharsOrig = bodyChars + self.bodyChars = _str2dict(bodyChars) + else: + self.bodyCharsOrig = initChars + self.bodyChars = _str2dict(initChars) + + self.maxSpecified = max > 0 + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.asKeyword = asKeyword + + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): + if self.bodyCharsOrig == self.initCharsOrig: + self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) + elif len(self.bodyCharsOrig) == 1: + self.reString = "%s[%s]*" % \ + (re.escape(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + else: + self.reString = "[%s][%s]*" % \ + (_escapeRegexRangeChars(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + if self.asKeyword: + self.reString = r"\b"+self.reString+r"\b" + try: + self.re = re.compile( self.reString ) + except: + self.re = None + + def parseImpl( self, instring, loc, doActions=True ): + if self.re: + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + return loc,result.group() + + if not(instring[ loc ] in self.initChars): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + instrlen = len(instring) + bodychars = self.bodyChars + maxloc = start + self.maxLen + maxloc = min( maxloc, instrlen ) + while loc < maxloc and instring[loc] in bodychars: + loc += 1 + + throwException = False + if loc - start < self.minLen: + throwException = True + if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: + throwException = True + if self.asKeyword: + if (start>0 and instring[start-1] in bodychars) or (loc4: + return s[:4]+"..." + else: + return s + + if ( self.initCharsOrig != self.bodyCharsOrig ): + self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) + else: + self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) + + return self.strRepr + + +class Regex(Token): + """Token for matching strings that match a given regular expression. + Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + """ + def __init__( self, pattern, flags=0): + """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" + super(Regex,self).__init__() + + if len(pattern) == 0: + warnings.warn("null string passed to Regex; use Empty() instead", + SyntaxWarning, stacklevel=2) + + self.pattern = pattern + self.flags = flags + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + d = result.groupdict() + ret = ParseResults(result.group()) + if d: + for k in d: + ret[k] = d[k] + return loc,ret + + def __str__( self ): + try: + return super(Regex,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "Re:(%s)" % repr(self.pattern) + + return self.strRepr + + +class QuotedString(Token): + """Token for matching strings that are delimited by quoting characters. + """ + def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): + """ + Defined with the following parameters: + - quoteChar - string of one or more characters defining the quote delimiting string + - escChar - character to escape quotes, typically backslash (default=None) + - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) + - multiline - boolean indicating whether quotes can span multiple lines (default=False) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + """ + super(QuotedString,self).__init__() + + # remove white space from quote chars - wont work anyway + quoteChar = quoteChar.strip() + if len(quoteChar) == 0: + warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + if endQuoteChar is None: + endQuoteChar = quoteChar + else: + endQuoteChar = endQuoteChar.strip() + if len(endQuoteChar) == 0: + warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + self.quoteChar = quoteChar + self.quoteCharLen = len(quoteChar) + self.firstQuoteChar = quoteChar[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + self.pattern = r'%s(?:[^%s%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + else: + self.flags = 0 + self.pattern = r'%s(?:[^%s\n\r%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + if len(self.endQuoteChar) > 1: + self.pattern += ( + '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' + ) + if escQuote: + self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + if escChar: + self.pattern += (r'|(?:%s.)' % re.escape(escChar)) + self.escCharReplacePattern = re.escape(self.escChar)+"(.)" + self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen:-self.endQuoteCharLen] + + if isinstance(ret,basestring): + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + def __str__( self ): + try: + return super(QuotedString,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) + + return self.strRepr + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given set. + Defined with string containing all disallowed characters, and an optional + minimum, maximum, and/or exact length. The default value for min is 1 (a + minimum value < 1 is not valid); the default values for max and exact + are 0, meaning no maximum or exact length restriction. + """ + def __init__( self, notChars, min=1, max=0, exact=0 ): + super(CharsNotIn,self).__init__() + self.skipWhitespace = False + self.notChars = notChars + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = ( self.minLen == 0 ) + #self.myException.msg = self.errmsg + self.mayIndexError = False + + def parseImpl( self, instring, loc, doActions=True ): + if instring[loc] in self.notChars: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + start = loc + loc += 1 + notchars = self.notChars + maxlen = min( start+self.maxLen, len(instring) ) + while loc < maxlen and \ + (instring[loc] not in notchars): + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + def __str__( self ): + try: + return super(CharsNotIn, self).__str__() + except: + pass + + if self.strRepr is None: + if len(self.notChars) > 4: + self.strRepr = "!W:(%s...)" % self.notChars[:4] + else: + self.strRepr = "!W:(%s)" % self.notChars + + return self.strRepr + +class White(Token): + """Special matching class for matching whitespace. Normally, whitespace is ignored + by pyparsing grammars. This class is included when some whitespace structures + are significant. Define with a string containing the whitespace characters to be + matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments, + as defined for the Word class.""" + whiteStrs = { + " " : "", + "\t": "", + "\n": "", + "\r": "", + "\f": "", + } + def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): + super(White,self).__init__() + self.matchWhite = ws + self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) + #~ self.leaveWhitespace() + self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def parseImpl( self, instring, loc, doActions=True ): + if not(instring[ loc ] in self.matchWhite): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min( maxloc, len(instring) ) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + +class _PositionToken(Token): + def __init__( self ): + super(_PositionToken,self).__init__() + self.name=self.__class__.__name__ + self.mayReturnEmpty = True + self.mayIndexError = False + +class GoToColumn(_PositionToken): + """Token to advance to a specific column of input text; useful for tabular report scraping.""" + def __init__( self, colno ): + super(GoToColumn,self).__init__() + self.col = colno + + def preParse( self, instring, loc ): + if col(loc,instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + thiscol = col( loc, instring ) + if thiscol > self.col: + raise ParseException( instring, loc, "Text not in expected column", self ) + newloc = loc + self.col - thiscol + ret = instring[ loc: newloc ] + return newloc, ret + +class LineStart(_PositionToken): + """Matches if current position is at the beginning of a line within the parse string""" + def __init__( self ): + super(LineStart,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected start of line" + #self.myException.msg = self.errmsg + + def preParse( self, instring, loc ): + preloc = super(LineStart,self).preParse(instring,loc) + if instring[preloc] == "\n": + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + if not( loc==0 or + (loc == self.preParse( instring, 0 )) or + (instring[loc-1] == "\n") ): #col(loc, instring) != 1: + #~ raise ParseException( instring, loc, "Expected start of line" ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class LineEnd(_PositionToken): + """Matches if current position is at the end of a line within the parse string""" + def __init__( self ): + super(LineEnd,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected end of line" + #self.myException.msg = self.errmsg + + def parseImpl( self, instring, loc, doActions=True ): + if loc len(instring): + return loc, [] + else: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class WordStart(_PositionToken): + """Matches if the current position is at the beginning of a Word, and + is not preceded by any character in a given set of wordChars + (default=printables). To emulate the \b behavior of regular expressions, + use WordStart(alphanums). WordStart will also match at the beginning of + the string being parsed, or at the beginning of a line. + """ + def __init__(self, wordChars = printables): + super(WordStart,self).__init__() + self.wordChars = _str2dict(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True ): + if loc != 0: + if (instring[loc-1] in self.wordChars or + instring[loc] not in self.wordChars): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class WordEnd(_PositionToken): + """Matches if the current position is at the end of a Word, and + is not followed by any character in a given set of wordChars + (default=printables). To emulate the \b behavior of regular expressions, + use WordEnd(alphanums). WordEnd will also match at the end of + the string being parsed, or at the end of a line. + """ + def __init__(self, wordChars = printables): + super(WordEnd,self).__init__() + self.wordChars = _str2dict(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True ): + instrlen = len(instring) + if instrlen>0 and loc maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + else: + if loc2 > maxMatchLoc: + maxMatchLoc = loc2 + maxMatchExp = e + + if maxMatchLoc < 0: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + return maxMatchExp._parse( instring, loc, doActions ) + + def __ixor__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #Or( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class MatchFirst(ParseExpression): + """Requires that at least one ParseExpression is found. + If two expressions match, the first one listed is the one that will match. + May be constructed using the '|' operator. + """ + def __init__( self, exprs, savelist = False ): + super(MatchFirst,self).__init__(exprs, savelist) + if exprs: + self.mayReturnEmpty = False + for e in self.exprs: + if e.mayReturnEmpty: + self.mayReturnEmpty = True + break + else: + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + maxExcLoc = -1 + maxException = None + for e in self.exprs: + try: + ret = e._parse( instring, loc, doActions ) + return ret + except ParseException, err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + + # only got here if no expression matched, raise exception for match that made it the furthest + else: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + def __ior__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #MatchFirst( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class Each(ParseExpression): + """Requires all given ParseExpressions to be found, but in any order. + Expressions may be separated by whitespace. + May be constructed using the '&' operator. + """ + def __init__( self, exprs, savelist = True ): + super(Each,self).__init__(exprs, savelist) + self.mayReturnEmpty = True + for e in self.exprs: + if not e.mayReturnEmpty: + self.mayReturnEmpty = False + break + self.skipWhitespace = True + self.initExprGroups = True + + def parseImpl( self, instring, loc, doActions=True ): + if self.initExprGroups: + self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] + self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] + self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] + self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] + self.required += self.multirequired + self.initExprGroups = False + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + matchOrder = [] + + keepMatching = True + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired + failed = [] + for e in tmpExprs: + try: + tmpLoc = e.tryParse( instring, tmpLoc ) + except ParseException: + failed.append(e) + else: + matchOrder.append(e) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + if tmpReqd: + missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) + raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) + + # add any unmatched Optionals, in case they have default values defined + matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) + + resultlist = [] + for e in matchOrder: + loc,results = e._parse(instring,loc,doActions) + resultlist.append(results) + + finalResults = ParseResults([]) + for r in resultlist: + dups = {} + for k in r.keys(): + if k in finalResults.keys(): + tmp = ParseResults(finalResults[k]) + tmp += ParseResults(r[k]) + dups[k] = tmp + finalResults += ParseResults(r) + for k,v in dups.items(): + finalResults[k] = v + return loc, finalResults + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" + def __init__( self, expr, savelist=False ): + super(ParseElementEnhance,self).__init__(savelist) + if isinstance( expr, basestring ): + expr = Literal(expr) + self.expr = expr + self.strRepr = None + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.setWhitespaceChars( expr.whiteChars ) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def parseImpl( self, instring, loc, doActions=True ): + if self.expr is not None: + return self.expr._parse( instring, loc, doActions, callPreParse=False ) + else: + raise ParseException("",loc,self.errmsg,self) + + def leaveWhitespace( self ): + self.skipWhitespace = False + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leaveWhitespace() + return self + + def ignore( self, other ): + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + else: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + return self + + def streamline( self ): + super(ParseElementEnhance,self).streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def checkRecursion( self, parseElementList ): + if self in parseElementList: + raise RecursiveGrammarException( parseElementList+[self] ) + subRecCheckList = parseElementList[:] + [ self ] + if self.expr is not None: + self.expr.checkRecursion( subRecCheckList ) + + def validate( self, validateTrace=[] ): + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion( [] ) + + def __str__( self ): + try: + return super(ParseElementEnhance,self).__str__() + except: + pass + + if self.strRepr is None and self.expr is not None: + self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) + return self.strRepr + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. FollowedBy + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression matches at the current + position. FollowedBy always returns a null token list.""" + def __init__( self, expr ): + super(FollowedBy,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + self.expr.tryParse( instring, loc ) + return loc, [] + + +class NotAny(ParseElementEnhance): + """Lookahead to disallow matching with the given parse expression. NotAny + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression does *not* match at the current + position. Also, NotAny does *not* skip over leading whitespace. NotAny + always returns a null token list. May be constructed using the '~' operator.""" + def __init__( self, expr ): + super(NotAny,self).__init__(expr) + #~ self.leaveWhitespace() + self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, "+_ustr(self.expr) + #self.myException = ParseException("",0,self.errmsg,self) + + def parseImpl( self, instring, loc, doActions=True ): + try: + self.expr.tryParse( instring, loc ) + except (ParseException,IndexError): + pass + else: + #~ raise ParseException(instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "~{" + _ustr(self.expr) + "}" + + return self.strRepr + + +class ZeroOrMore(ParseElementEnhance): + """Optional repetition of zero or more of the given expression.""" + def __init__( self, expr ): + super(ZeroOrMore,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + tokens = [] + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + + +class OneOrMore(ParseElementEnhance): + """Repetition of one or more of the given expression.""" + def parseImpl( self, instring, loc, doActions=True ): + # must be at least one + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + try: + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + _ustr(self.expr) + "}..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(OneOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + +class _NullToken(object): + def __bool__(self): + return False + __nonzero__ = __bool__ + def __str__(self): + return "" + +_optionalNotMatched = _NullToken() +class Optional(ParseElementEnhance): + """Optional matching of the given expression. + A default return string can also be specified, if the optional expression + is not found. + """ + def __init__( self, exprs, default=_optionalNotMatched ): + super(Optional,self).__init__( exprs, savelist=False ) + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + except (ParseException,IndexError): + if self.defaultValue is not _optionalNotMatched: + if self.expr.resultsName: + tokens = ParseResults([ self.defaultValue ]) + tokens[self.expr.resultsName] = self.defaultValue + else: + tokens = [ self.defaultValue ] + else: + tokens = [] + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]" + + return self.strRepr + + +class SkipTo(ParseElementEnhance): + """Token for skipping over all undefined text until the matched expression is found. + If include is set to true, the matched expression is also parsed (the skipped text + and matched expression are returned as a 2-element list). The ignore + argument is used to define grammars (typically quoted strings and comments) that + might contain false matches. + """ + def __init__( self, other, include=False, ignore=None, failOn=None ): + super( SkipTo, self ).__init__( other ) + self.ignoreExpr = ignore + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.asList = False + if failOn is not None and isinstance(failOn, basestring): + self.failOn = Literal(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for "+_ustr(self.expr) + #self.myException = ParseException("",0,self.errmsg,self) + + def parseImpl( self, instring, loc, doActions=True ): + startLoc = loc + instrlen = len(instring) + expr = self.expr + failParse = False + while loc <= instrlen: + try: + if self.failOn: + try: + self.failOn.tryParse(instring, loc) + except ParseBaseException: + pass + else: + failParse = True + raise ParseException(instring, loc, "Found expression " + str(self.failOn)) + failParse = False + if self.ignoreExpr is not None: + while 1: + try: + loc = self.ignoreExpr.tryParse(instring,loc) + print "found ignoreExpr, advance to", loc + except ParseBaseException: + break + expr._parse( instring, loc, doActions=False, callPreParse=False ) + skipText = instring[startLoc:loc] + if self.includeMatch: + loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) + if mat: + skipRes = ParseResults( skipText ) + skipRes += mat + return loc, [ skipRes ] + else: + return loc, [ skipText ] + else: + return loc, [ skipText ] + except (ParseException,IndexError): + if failParse: + raise + else: + loc += 1 + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Forward(ParseElementEnhance): + """Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the Forward variable using the '<<' operator. + + Note: take care when assigning to Forward not to overlook precedence of operators. + Specifically, '|' has a lower precedence than '<<', so that:: + fwdExpr << a | b | c + will actually be evaluated as:: + (fwdExpr << a) | b | c + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the Forward:: + fwdExpr << (a | b | c) + """ + def __init__( self, other=None ): + super(Forward,self).__init__( other, savelist=False ) + + def __lshift__( self, other ): + if isinstance( other, basestring ): + other = Literal(other) + self.expr = other + self.mayReturnEmpty = other.mayReturnEmpty + self.strRepr = None + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.setWhitespaceChars( self.expr.whiteChars ) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + return None + + def leaveWhitespace( self ): + self.skipWhitespace = False + return self + + def streamline( self ): + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate( self, validateTrace=[] ): + if self not in validateTrace: + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion([]) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + self._revertClass = self.__class__ + self.__class__ = _ForwardNoRecurse + try: + if self.expr is not None: + retString = _ustr(self.expr) + else: + retString = "None" + finally: + self.__class__ = self._revertClass + return self.__class__.__name__ + ": " + retString + + def copy(self): + if self.expr is not None: + return super(Forward,self).copy() + else: + ret = Forward() + ret << self + return ret + +class _ForwardNoRecurse(Forward): + def __str__( self ): + return "..." + +class TokenConverter(ParseElementEnhance): + """Abstract subclass of ParseExpression, for converting parsed results.""" + def __init__( self, expr, savelist=False ): + super(TokenConverter,self).__init__( expr )#, savelist ) + self.saveAsList = False + +class Upcase(TokenConverter): + """Converter to upper case all matching tokens.""" + def __init__(self, *args): + super(Upcase,self).__init__(*args) + warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", + DeprecationWarning,stacklevel=2) + + def postParse( self, instring, loc, tokenlist ): + return list(map( string.upper, tokenlist )) + + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the input string; + this can be disabled by specifying 'adjacent=False' in the constructor. + """ + def __init__( self, expr, joinString="", adjacent=True ): + super(Combine,self).__init__( expr ) + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leaveWhitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + + def ignore( self, other ): + if self.adjacent: + ParserElement.ignore(self, other) + else: + super( Combine, self).ignore( other ) + return self + + def postParse( self, instring, loc, tokenlist ): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) + + if self.resultsName and len(retToks.keys())>0: + return [ retToks ] + else: + return retToks + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" + def __init__( self, expr ): + super(Group,self).__init__( expr ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + return [ tokenlist ] + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also as a dictionary. + Each element can also be referenced using the first token in the expression as its key. + Useful for tabular report scraping when the first column can be used as a item key. + """ + def __init__( self, exprs ): + super(Dict,self).__init__( exprs ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + for i,tok in enumerate(tokenlist): + if len(tok) == 0: + continue + ikey = tok[0] + if isinstance(ikey,int): + ikey = _ustr(tok[0]).strip() + if len(tok)==1: + tokenlist[ikey] = _ParseResultsWithOffset("",i) + elif len(tok)==2 and not isinstance(tok[1],ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) + else: + dictvalue = tok.copy() #ParseResults(i) + del dictvalue[0] + if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) + + if self.resultsName: + return [ tokenlist ] + else: + return tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression.""" + def postParse( self, instring, loc, tokenlist ): + return [] + + def suppress( self ): + return self + + +class OnlyOnce(object): + """Wrapper for parse actions, to ensure they are only called once.""" + def __init__(self, methodCall): + self.callable = ParserElement._normalizeParseActionArgs(methodCall) + self.called = False + def __call__(self,s,l,t): + if not self.called: + results = self.callable(s,l,t) + self.called = True + return results + raise ParseException(s,l,"") + def reset(self): + self.called = False + +def traceParseAction(f): + """Decorator for debugging parse actions.""" + f = ParserElement._normalizeParseActionArgs(f) + def z(*paArgs): + thisFunc = f.func_name + s,l,t = paArgs[-3:] + if len(paArgs)>3: + thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) + try: + ret = f(*paArgs) + except Exception, exc: + sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) + try: + if len(symbols)==len("".join(symbols)): + return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) + else: + return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) + except: + warnings.warn("Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, stacklevel=2) + + + # last resort, just use MatchFirst + return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) + +def dictOf( key, value ): + """Helper to easily and clearly define a dictionary by specifying the respective patterns + for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens + in the proper order. The key pattern can include delimiting markers or punctuation, + as long as they are suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the Dict results can include named token + fields. + """ + return Dict( ZeroOrMore( Group ( key + value ) ) ) + +def originalTextFor(expr, asString=True): + """Helper to return the original, untokenized text for a given expression. Useful to + restore the parsed fields of an HTML start tag into the raw tag text itself, or to + revert separate tokens with intervening whitespace back to the original matching + input text. Simpler to use than the parse action keepOriginalText, and does not + require the inspect module to chase up the call stack. By default, returns a + string containing the original parsed text. + + If the optional asString argument is passed as False, then the return value is a + ParseResults containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if + the expression passed to originalTextFor contains expressions with defined + results names, you must set asString to False if you want to preserve those + results name values.""" + locMarker = Empty().setParseAction(lambda s,loc,t: loc) + matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") + if asString: + extractText = lambda s,l,t: s[t._original_start:t._original_end] + else: + def extractText(s,l,t): + del t[:] + t.insert(0, s[t._original_start:t._original_end]) + del t["_original_start"] + del t["_original_end"] + matchExpr.setParseAction(extractText) + return matchExpr + +# convenience constants for positional expressions +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") +stringStart = StringStart().setName("stringStart") +stringEnd = StringEnd().setName("stringEnd") + +_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) +_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) +_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) +_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) +_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) +_charRange = Group(_singleChar + Suppress("-") + _singleChar) +_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" + +_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) + +def srange(s): + r"""Helper to easily define string ranges for use in Word construction. Borrows + syntax from regexp '[]' string range definitions:: + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + The input string must be enclosed in []'s, and the returned string is the expanded + character set joined into a single string. + The values enclosed in the []'s may be:: + a single character + an escaped character with a leading backslash (such as \- or \]) + an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) + an escaped octal character with a leading '\0' (\041, which is a '!' character) + a range of any of the above, separated by a dash ('a-z', etc.) + any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) + """ + try: + return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) + except: + return "" + +def matchOnlyAtCol(n): + """Helper method for defining parse actions that require matching at a specific + column in the input text. + """ + def verifyCol(strg,locn,toks): + if col(locn,strg) != n: + raise ParseException(strg,locn,"matched token not at column %d" % n) + return verifyCol + +def replaceWith(replStr): + """Helper method for common parse actions that simply return a literal value. Especially + useful when used with transformString(). + """ + def _replFunc(*args): + return [replStr] + return _replFunc + +def removeQuotes(s,l,t): + """Helper parse action for removing quotation marks from parsed quoted strings. + To use, add this parse action to quoted string using:: + quotedString.setParseAction( removeQuotes ) + """ + return t[0][1:-1] + +def upcaseTokens(s,l,t): + """Helper parse action to convert tokens to upper case.""" + return [ tt.upper() for tt in map(_ustr,t) ] + +def downcaseTokens(s,l,t): + """Helper parse action to convert tokens to lower case.""" + return [ tt.lower() for tt in map(_ustr,t) ] + +def keepOriginalText(s,startLoc,t): + """Helper parse action to preserve original parsed text, + overriding any nested parse actions.""" + try: + endloc = getTokensEndLoc() + except ParseException: + raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") + del t[:] + t += ParseResults(s[startLoc:endloc]) + return t + +def getTokensEndLoc(): + """Method to be called from within a parse action to determine the end + location of the parsed tokens.""" + import inspect + fstack = inspect.stack() + try: + # search up the stack (through intervening argument normalizers) for correct calling routine + for f in fstack[2:]: + if f[3] == "_parseNoCache": + endloc = f[0].f_locals["loc"] + return endloc + else: + raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") + finally: + del fstack + +def _makeTags(tagStr, xml): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr,basestring): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas,alphanums+"_-:") + if (xml): + tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) + openTag = Suppress("<") + tagStr + \ + Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + else: + printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) + tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) + openTag = Suppress("<") + tagStr + \ + Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ + Optional( Suppress("=") + tagAttrValue ) ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + closeTag = Combine(_L("") + + openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) + closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) + + return openTag, closeTag + +def makeHTMLTags(tagStr): + """Helper to construct opening and closing tag expressions for HTML, given a tag name""" + return _makeTags( tagStr, False ) + +def makeXMLTags(tagStr): + """Helper to construct opening and closing tag expressions for XML, given a tag name""" + return _makeTags( tagStr, True ) + +def withAttribute(*args,**attrDict): + """Helper to create a validating parse action to be used with start tags created + with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as + or
. + + Call withAttribute with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in (class="Customer",align="right"), or + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. + + To verify that the attribute exists, but without specifying a value, pass + withAttribute.ANY_VALUE as the value. + """ + if args: + attrs = args[:] + else: + attrs = attrDict.items() + attrs = [(k,v) for k,v in attrs] + def pa(s,l,tokens): + for attrName,attrValue in attrs: + if attrName not in tokens: + raise ParseException(s,l,"no matching attribute " + attrName) + if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) + return pa +withAttribute.ANY_VALUE = object() + +opAssoc = _Constants() +opAssoc.LEFT = object() +opAssoc.RIGHT = object() + +def operatorPrecedence( baseExpr, opList ): + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants opAssoc.RIGHT and opAssoc.LEFT. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted) + """ + ret = Forward() + lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) + for i,operDef in enumerate(opList): + opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] + if arity == 3: + if opExpr is None or len(opExpr) != 2: + raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") + opExpr1, opExpr2 = opExpr + thisExpr = Forward()#.setName("expr%d" % i) + if rightLeftAssoc == opAssoc.LEFT: + if arity == 1: + matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) + else: + matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ + Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + elif rightLeftAssoc == opAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Optional): + opExpr = Optional(opExpr) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) + else: + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ + Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + else: + raise ValueError("operator must indicate right or left associativity") + if pa: + matchExpr.setParseAction( pa ) + thisExpr << ( matchExpr | lastExpr ) + lastExpr = thisExpr + ret << lastExpr + return ret + +dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") +sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") +quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()) + +def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): + """Helper method for defining nested lists enclosed in opening and closing + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default="("); can also be a pyparsing expression + - closer - closing character for a nested list (default=")"); can also be a pyparsing expression + - content - expression for items within the nested lists (default=None) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the ignoreExpr argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an Or or MatchFirst. + The default is quotedString, but if no expressions are to be ignored, + then pass None for this argument. + """ + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener,basestring) and isinstance(closer,basestring): + if len(opener) == 1 and len(closer)==1: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t:t[0].strip())) + else: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + raise ValueError("opening and closing arguments must be strings if no content expression is given") + ret = Forward() + if ignoreExpr is not None: + ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) + else: + ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) + return ret + +def indentedBlock(blockStatementExpr, indentStack, indent=True): + """Helper method for defining space-delimited indentation blocks, such as + those used to define block statements in Python source code. + + Parameters: + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single grammar + should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond the + the current level; set to False for block of left-most statements + (default=True) + + A valid block must contain at least one blockStatement. + """ + def checkPeerIndent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseFatalException(s,l,"illegal nesting") + raise ParseException(s,l,"not a peer entry") + + def checkSubIndent(s,l,t): + curCol = col(l,s) + if curCol > indentStack[-1]: + indentStack.append( curCol ) + else: + raise ParseException(s,l,"not a subentry") + + def checkUnindent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s,l,"not an unindent") + indentStack.pop() + + NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) + INDENT = Empty() + Empty().setParseAction(checkSubIndent) + PEER = Empty().setParseAction(checkPeerIndent) + UNDENT = Empty().setParseAction(checkUnindent) + if indent: + smExpr = Group( Optional(NL) + + FollowedBy(blockStatementExpr) + + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) + else: + smExpr = Group( Optional(NL) + + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) +commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() +_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) +replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") + +htmlComment = Regex(r"") +restOfLine = Regex(r".*").leaveWhitespace() +dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") +cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) + print ("tokens = " + str(tokens)) + print ("tokens.columns = " + str(tokens.columns)) + print ("tokens.tables = " + str(tokens.tables)) + print (tokens.asXML("SQL",True)) + except ParseBaseException,err: + print (teststring + "->") + print (err.line) + print (" "*(err.column-1) + "^") + print (err) + print() + + selectToken = CaselessLiteral( "select" ) + fromToken = CaselessLiteral( "from" ) + + ident = Word( alphas, alphanums + "_$" ) + columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + columnNameList = Group( delimitedList( columnName ) )#.setName("columns") + tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + tableNameList = Group( delimitedList( tableName ) )#.setName("tables") + simpleSQL = ( selectToken + \ + ( '*' | columnNameList ).setResultsName( "columns" ) + \ + fromToken + \ + tableNameList.setResultsName( "tables" ) ) + + test( "SELECT * from XYZZY, ABC" ) + test( "select * from SYS.XYZZY" ) + test( "Select A from Sys.dual" ) + test( "Select AA,BB,CC from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Xelect A, B, C from Sys.dual" ) + test( "Select A, B, C frox Sys.dual" ) + test( "Select" ) + test( "Select ^^^ frox Sys.dual" ) + test( "Select A, B, C from Sys.dual, Table2 " ) diff --git a/libmproxy/resources/bogus_template b/libmproxy/resources/bogus_template new file mode 100644 index 000000000..afa7281c0 --- /dev/null +++ b/libmproxy/resources/bogus_template @@ -0,0 +1,11 @@ +[ req ] +prompt = no +distinguished_name = req_distinguished_name + +[ req_distinguished_name ] +C = NZ +ST = none +L = none +O = none +OU = none +emailAddress = none diff --git a/libmproxy/utils.py b/libmproxy/utils.py new file mode 100644 index 000000000..82ddf8fc2 --- /dev/null +++ b/libmproxy/utils.py @@ -0,0 +1,277 @@ +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import re, os, subprocess + +def isBin(s): + """ + Does this string have any non-ASCII characters? + """ + for i in s: + i = ord(i) + if i < 9: + return True + elif i > 13 and i < 32: + return True + elif i > 126: + return True + return False + + +def cleanBin(s): + parts = [] + for i in s: + o = ord(i) + if o > 31 and o < 127: + parts.append(i) + else: + parts.append(".") + return "".join(parts) + + +def hexdump(s): + """ + Returns a set of typles: + (offset, hex, str) + """ + parts = [] + for i in range(0, len(s), 16): + o = "%.10x"%i + part = s[i:i+16] + x = " ".join(["%.2x"%ord(i) for i in part]) + if len(part) < 16: + x += " " + x += " ".join([" " for i in range(16-len(part))]) + parts.append( + (o, x, cleanBin(part)) + ) + return parts + + +def isStringLike(anobj): + try: + # Avoid succeeding expensively if anobj is large. + anobj[:0]+'' + except: + return 0 + else: + return 1 + + +def isSequenceLike(anobj): + """ + Is anobj a non-string sequence type (list, tuple, iterator, or + similar)? Crude, but mostly effective. + """ + if not hasattr(anobj, "next"): + if isStringLike(anobj): + return 0 + try: + anobj[:0] + except: + return 0 + return 1 + + +def _caseless(s): + return s.lower() + + +class MultiDict: + """ + Simple wrapper around a dictionary to make holding multiple objects per + key easier. + + Note that this class assumes that keys are strings. + + Keys have no order, but the order in which values are added to a key is + preserved. + """ + # This ridiculous bit of subterfuge is needed to prevent the class from + # treating this as a bound method. + _helper = (str,) + def __init__(self): + self._d = dict() + + def copy(self): + m = self.__class__() + m._d = self._d.copy() + return m + + def clear(self): + return self._d.clear() + + def get(self, key, d=None): + key = self._helper[0](key) + return self._d.get(key, d) + + def __eq__(self, other): + return dict(self) == dict(other) + + def __delitem__(self, key): + self._d.__delitem__(key) + + def __getitem__(self, key): + key = self._helper[0](key) + return self._d.__getitem__(key) + + def __setitem__(self, key, value): + if not isSequenceLike(value): + raise ValueError, "Cannot insert non-sequence." + key = self._helper[0](key) + return self._d.__setitem__(key, value) + + def has_key(self, key): + key = self._helper[0](key) + return self._d.has_key(key) + + def keys(self): + return self._d.keys() + + def extend(self, key, value): + if not self.has_key(key): + self[key] = [] + self[key].extend(value) + + def append(self, key, value): + self.extend(key, [value]) + + def itemPairs(self): + """ + Yield all possible pairs of items. + """ + for i in self.keys(): + for j in self[i]: + yield (i, j) + + +class Headers(MultiDict): + """ + A dictionary-like class for keeping track of HTTP headers. + + It is case insensitive, and __repr__ formats the headers correcty for + output to the server. + """ + _helper = (_caseless,) + def __repr__(self): + """ + Returns a string containing a formatted header string. + """ + headerElements = [] + for key in self.keys(): + for val in self[key]: + headerElements.append(key + ": " + val) + headerElements.append("") + return "\r\n".join(headerElements) + + def match_re(self, expr): + """ + Match the regular expression against each header (key, value) pair. + """ + for k, v in self.itemPairs(): + s = "%s: %s"%(k, v) + if re.search(expr, s): + return True + return False + + def read(self, fp): + """ + Read a set of headers from a file pointer. Stop once a blank line + is reached. + """ + name = '' + while 1: + line = fp.readline() + if not line or line == '\r\n' or line == '\n': + break + if line[0] in ' \t': + # continued header + self[name][-1] = self[name][-1] + '\r\n ' + line.strip() + else: + i = line.find(':') + # We're being liberal in what we accept, here. + if i > 0: + name = line[:i] + value = line[i+1:].strip() + if self.has_key(name): + # merge value + self.append(name, value) + else: + self[name] = [value] + + +def pretty_size(size): + suffixes = [ + ("B", 2**10), + ("kB", 2**20), + ("M", 2**30), + ] + for suf, lim in suffixes: + if size >= lim: + continue + else: + x = round(size/float(lim/2**10), 2) + if x == int(x): + x = int(x) + return str(x) + suf + + +class Data: + def __init__(self, name): + m = __import__(name) + dirname, _ = os.path.split(m.__file__) + self.dirname = os.path.abspath(dirname) + + def path(self, path): + """ + Returns a path to the package data housed at 'path' under this + module.Path can be a path to a file, or to a directory. + + This function will raise ValueError if the path does not exist. + """ + fullpath = os.path.join(self.dirname, path) + if not os.path.exists(fullpath): + raise ValueError, "dataPath: %s does not exist."%fullpath + return fullpath +data = Data(__name__) + + +def make_bogus_cert(path): + # Generates a bogus certificate like so: + # openssl req -config template -x509 -nodes -days 9999 -newkey rsa:1024 \ + # -keyout cert.pem -out cert.pem + + d = os.path.dirname(path) + if not os.path.exists(d): + os.makedirs(d) + + cmd = [ + "openssl", + "req", + "-config", data.path("resources/bogus_template"), + "-x509" , + "-nodes", + "-days", "9999", + "-newkey", "rsa:1024", + "-keyout", path, + "-out", path, + ] + subprocess.call( + cmd, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE + ) + diff --git a/mitmproxy b/mitmproxy new file mode 100755 index 000000000..692d7bc6b --- /dev/null +++ b/mitmproxy @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 Aldo Cortesi +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import sys, os.path +from libmproxy import proxy, controller, console, utils +from optparse import OptionParser, OptionGroup + + +if __name__ == '__main__': + parser = OptionParser( + usage = "%prog [options] output", + version="%prog 0.1", + ) + parser.add_option( + "-d", "--dump", action="store_true", + dest="dump", default=False, + help = "Just dump data to screen." + ) + parser.add_option( + "-c", "--cert", action="store", + type = "str", dest="cert", default="~/.mitmproxy/cert.pem", + help = "SSL certificate file." + ) + parser.add_option( + "-p", "--port", action="store", + type = "int", dest="port", default=8080, + help = "Port." + ) + parser.add_option("-q", "--quiet", + action="store_true", dest="quiet", + help="Quiet.") + parser.add_option("-v", "--verbose", + action="count", dest="verbose", default=1, + help="Increase verbosity. Can be passed multiple times.") + options, args = parser.parse_args() + + if options.quiet: + options.verbose = 0 + + certpath = os.path.expanduser(options.cert) + + if not os.path.exists(certpath): + print >> sys.stderr, "Creating bogus certificate at %s"%options.cert + utils.make_bogus_cert(certpath) + + proxy.config = proxy.Config( + certpath + ) + server = proxy.ProxyServer(options.port) + if options.dump: + m = controller.DumpMaster(server, options.verbose) + else: + m = console.ConsoleMaster(server, options.verbose) + if options.verbose > 0: + print >> sys.stderr, "Running on port %s"%options.port + m.run() diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..45728608f --- /dev/null +++ b/setup.py @@ -0,0 +1,97 @@ +from distutils.core import setup +import fnmatch, os.path + +def _fnmatch(name, patternList): + for i in patternList: + if fnmatch.fnmatch(name, i): + return True + return False + + +def _splitAll(path): + parts = [] + h = path + while 1: + if not h: + break + h, t = os.path.split(h) + parts.append(t) + parts.reverse() + return parts + + +def findPackages(path, dataExclude=[]): + """ + Recursively find all packages and data directories rooted at path. Note + that only data _directories_ and their contents are returned - + non-Python files at module scope are not, and should be manually + included. + + dataExclude is a list of fnmatch-compatible expressions for files and + directories that should not be included in pakcage_data. + + Returns a (packages, package_data) tuple, ready to be passed to the + corresponding distutils.core.setup arguments. + """ + packages = [] + datadirs = [] + for root, dirs, files in os.walk(path, topdown=True): + if "__init__.py" in files: + p = _splitAll(root) + packages.append(".".join(p)) + else: + dirs[:] = [] + if packages: + datadirs.append(root) + + # Now we recurse into the data directories + package_data = {} + for i in datadirs: + if not _fnmatch(i, dataExclude): + parts = _splitAll(i) + module = ".".join(parts[:-1]) + acc = package_data.get(module, []) + for root, dirs, files in os.walk(i, topdown=True): + sub = os.path.join(*_splitAll(root)[1:]) + if not _fnmatch(sub, dataExclude): + for fname in files: + path = os.path.join(sub, fname) + if not _fnmatch(path, dataExclude): + acc.append(path) + else: + dirs[:] = [] + package_data[module] = acc + return packages, package_data + + + + +long_description = """ +A man-in-the-middle intercepting proxy written in Python. + +Features +======== + + * Intercept HTTP and HTTPS traffic. + * Modify, manipulate and replay requests and responses on the fly. +""" +packages, package_data = findPackages("libmproxy") +print packages, package_data +version = "0.1" +setup( + name = "mitmproxy", + version = version, + description = "An interactive intercepting proxy server.", + long_description = long_description, + author = "Aldo Cortesi", + author_email = "aldo@corte.si", + url = "http://corte.si/software/mitmproxy", + packages = packages, + package_data = package_data, + scripts = ["mitmproxy"], + classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Operating System :: OS Independent", + ] +) diff --git a/test/.pry b/test/.pry new file mode 100644 index 000000000..0e4b2e23c --- /dev/null +++ b/test/.pry @@ -0,0 +1,5 @@ +base = .. +coverage = ../libmproxy +exclude = ../libmproxy/pyparsing.py + . + diff --git a/test/data/serverkey.pem b/test/data/serverkey.pem new file mode 100644 index 000000000..289bfa71f --- /dev/null +++ b/test/data/serverkey.pem @@ -0,0 +1,32 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXQIBAAKBgQC+N+9bv1YC0GKbGdv2wMuuWTGSNwE/Hq5IIxYN1eITsvbD1GgB +69x++XJd6KTIthnta0KCpCAtbaYbCkhUfxCVv2bP+iQt2AjwMOZlgRZ+RGJ25dBu +AjAxQmqDJcAdS6MoRHWziomnUNfNogVrfqjpvJor+1iRnrj2q00ab9WYCwIDAQAB +AoGBAIM7V9l2UcKzPbQ/zO+Z52urgXWcmTGQ2zBNdIOrEcQBbhmAyxi4PnEja3G6 +dSU77PtNSp+S19g/k5+IIoqY9zkGigdaPhRVRKJgBTAzFzMz+WHpQIffDojFKCnL +gyDnzMRJY8+cnsCqbHRY4hqFiCr8Rq9sCdlynAytdtrnxzqhAkEA9bha6MO+L0JA +6IEEbVY1vtaUO9Xg5DUDjRxQcfniSJACb/2IvF0tvxAnG7I/S8AavCXqtlDPtYkI +WOxY5Sd62QJBAMYtKUxGka4XxwCyBK8EUNaN8m9C++mpjoHD1kFri9B1bXm91nCO +iGWqtqdarwyEc/pAHw5UGzVyBXticPIcs4MCQQCcPvsHsZhYoq91aLyw7bXFQNsH +ZUvYsOEuNIfuwa+i5ne2UKhG5pU1PgcwNFrNRz140D98aMx7KcS2DqvEIyOZAkBF +6Yi4L+0Uza6WwDaGx679AfaU6byVIgv0G3JqgdZBJCwK1r3f12im9SKax5MZh2Ci +2Bwcoe83W5IzhPbzcsyhAkBo8O2U2vig5PQWQ0BUKJrCGHLq//D/ttdLVtmc6eWc +zqssCF3Unkk3bOq35swSKeAx8WotPPVsALWr87N2hCB+ +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICsDCCAhmgAwIBAgIJANwogM9sqMHLMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV +BAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRlcm5ldCBX +aWRnaXRzIFB0eSBMdGQwHhcNMTAwMTMxMDEzOTEzWhcNMTEwMTMxMDEzOTEzWjBF +MQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29tZS1TdGF0ZTEhMB8GA1UEChMYSW50 +ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB +gQC+N+9bv1YC0GKbGdv2wMuuWTGSNwE/Hq5IIxYN1eITsvbD1GgB69x++XJd6KTI +thnta0KCpCAtbaYbCkhUfxCVv2bP+iQt2AjwMOZlgRZ+RGJ25dBuAjAxQmqDJcAd +S6MoRHWziomnUNfNogVrfqjpvJor+1iRnrj2q00ab9WYCwIDAQABo4GnMIGkMB0G +A1UdDgQWBBTTnBZyw7ZZsb8+/6gvZFIHhVgtDzB1BgNVHSMEbjBsgBTTnBZyw7ZZ +sb8+/6gvZFIHhVgtD6FJpEcwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgTClNvbWUt +U3RhdGUxITAfBgNVBAoTGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZIIJANwogM9s +qMHLMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEApz428aOar0EBuAib +I+liefRlK4I3MQQxq3tOeB1dgAIo0ivKtdVJGi1kPg8EO0KMvFfn6IRtssUmFgCp +JBD+HoDzFxwI1bLMVni+g7OzaNSwL3nQ94lZUdpWMYDxqY4bLUv3goX1TlN9lmpG +8FiBLYUC0RNTCCRDFGfDr/wUT/M= +-----END CERTIFICATE----- diff --git a/test/data/testkey.pem b/test/data/testkey.pem new file mode 100644 index 000000000..af8d9d8f6 --- /dev/null +++ b/test/data/testkey.pem @@ -0,0 +1,32 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXQIBAAKBgQC+6rG6A/BGD0dI+mh2FZIqQZn82z/pGs4f3pyxbHb+ROxjjQOr +fDCw2jc11XDxK7CXpDQAnkO6au/sQ5t50vSZ+PGhFD+t558VV2ausB5OYZsR7RRx +gl1jsxWdde3EHGjxSK+aXRgFpVrZzPLSy6dl8tMoqUMWIBi0u1WTbmyYjwIDAQAB +AoGBAKyqhmK9/Sjf2JDgKGnjyHX/Ls3JXVvtqk6Yfw7YEiaVH1ZJyu/lOgQ414YQ +rDzyTpxXHdERUh/fZ24/FvZvHFgy5gWEQjQPpprIxvqCLKJhX73L2+TnXmfYDApb +J7V/JfnTeOaK9LTpHsofB98A1s9DWX/ccOgKTtZIYMjYpdoBAkEA9hLvtixbO2A2 +ZgDcA9ftVX2WwdpRH+mYXl1G60Fem5nlO3Rl3FDoafRvSQNZiqyOlObvKbbYh/S2 +L7ihEMMNYQJBAMaeLnAc9jO/z4ApTqSBGUpM9b7ul16aSgq56saUI0VULIZcXeo3 +3BwdL2fEOOnzjNy6NpH2BW63h/+2t7lV++8CQQDK+S+1Sr0uKtx0Iv1YRkHEJMW3 +vQbxldNS8wnOf6s0GisVcZubsTkkPLWWuiaf1ln9xMc9106gRmAI2PgyRVHBAkA6 +iI+C9uYP5i1Oxd2pWWqMnRWnSUVO2gWMF7J7B1lFq0Lb7gi3Z/L0Th2UZR2oxN/0 +hORkK676LBhmYgDPG+n9AkAJOnPIFQVAEBAO9bAxFrje8z6GRt332IlgxuiTeDE3 +EAlH9tmZma4Tri4sWnhJwCsxl+5hWamI8NL4EIeXRvPw +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICsDCCAhmgAwIBAgIJAI7G7a/d5YwEMA0GCSqGSIb3DQEBBQUAMEUxCzAJBgNV +BAYTAkFVMRMwEQYDVQQIEwpTb21lLVN0YXRlMSEwHwYDVQQKExhJbnRlcm5ldCBX +aWRnaXRzIFB0eSBMdGQwHhcNMTAwMjAyMDM0MTExWhcNMTEwMjAyMDM0MTExWjBF +MQswCQYDVQQGEwJBVTETMBEGA1UECBMKU29tZS1TdGF0ZTEhMB8GA1UEChMYSW50 +ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB +gQC+6rG6A/BGD0dI+mh2FZIqQZn82z/pGs4f3pyxbHb+ROxjjQOrfDCw2jc11XDx +K7CXpDQAnkO6au/sQ5t50vSZ+PGhFD+t558VV2ausB5OYZsR7RRxgl1jsxWdde3E +HGjxSK+aXRgFpVrZzPLSy6dl8tMoqUMWIBi0u1WTbmyYjwIDAQABo4GnMIGkMB0G +A1UdDgQWBBS+MFJTsriCPNYsj8/4f+PympPEkzB1BgNVHSMEbjBsgBS+MFJTsriC +PNYsj8/4f+PympPEk6FJpEcwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgTClNvbWUt +U3RhdGUxITAfBgNVBAoTGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZIIJAI7G7a/d +5YwEMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAlpan/QX2fpXVRihV +lQic2DktF4xd5unrZnFC8X8ScNX1ClU+AO79ejaobt4YGjeVYs0iQQsUL2E0G43c +mOXfsq1b970Ep6xRS76EmZ+tTdFBd86tFTIhZJrOi67gs+twj5V2elyp3tQpg2ze +G/jwDQS8V1X9CbfqBQriL7x5Tk4= +-----END CERTIFICATE----- diff --git a/test/handler.py b/test/handler.py new file mode 100644 index 000000000..5803b4d1b --- /dev/null +++ b/test/handler.py @@ -0,0 +1,25 @@ +import socket +from BaseHTTPServer import BaseHTTPRequestHandler + + +class TestRequestHandler(BaseHTTPRequestHandler): + default_request_version = "HTTP/1.1" + def setup(self): + self.connection = self.request + self.rfile = socket._fileobject(self.request, "rb", self.rbufsize) + self.wfile = socket._fileobject(self.request, "wb", self.wbufsize) + + def log_message(self, *args, **kwargs): + pass + + def do_GET(self): + data = "data: %s\npath: %s\n"%(self.headers, self.path) + self.send_response(200) + self.send_header("proxtest", "testing") + self.send_header("Content-type", "text-html") + self.send_header("Content-length", len(data)) + self.end_headers() + self.wfile.write(data) + + + diff --git a/test/serv.py b/test/serv.py new file mode 100644 index 000000000..9e43c08f2 --- /dev/null +++ b/test/serv.py @@ -0,0 +1,10 @@ +import socket, os, cStringIO, tempfile +from SocketServer import BaseServer +from BaseHTTPServer import HTTPServer +import handler + +def make(port): + server_address = ('', port) + return HTTPServer(server_address, handler.TestRequestHandler) + + diff --git a/test/sslserv.py b/test/sslserv.py new file mode 100644 index 000000000..5153d0dae --- /dev/null +++ b/test/sslserv.py @@ -0,0 +1,22 @@ +import socket, os, cStringIO, tempfile +from SocketServer import BaseServer +from BaseHTTPServer import HTTPServer +import ssl +import handler + + +class SecureHTTPServer(HTTPServer): + def __init__(self, server_address, HandlerClass): + BaseServer.__init__(self, server_address, HandlerClass) + self.socket = ssl.wrap_socket( + socket.socket(self.address_family, self.socket_type), + keyfile = "data/serverkey.pem", + certfile = "data/serverkey.pem" + ) + self.server_bind() + self.server_activate() + + +def make(port): + server_address = ('', port) + return SecureHTTPServer(server_address, handler.TestRequestHandler) diff --git a/test/test_console.py b/test/test_console.py new file mode 100644 index 000000000..50780aa57 --- /dev/null +++ b/test/test_console.py @@ -0,0 +1,269 @@ +from libmproxy import console, proxy, utils, filt +import libpry + +def treq(conn=None): + if not conn: + conn = proxy.BrowserConnection("address", 22) + headers = utils.Headers() + headers["header"] = ["qvalue"] + return proxy.Request(conn, "host", 80, "http", "GET", "/path", headers, "content") + + +def tresp(req=None): + if not req: + req = treq() + headers = utils.Headers() + headers["header_response"] = ["svalue"] + return proxy.Response(req, 200, "HTTP/1.1", "message", headers, "content_response") + + +def tflow(): + bc = proxy.BrowserConnection("address", 22) + return console.Flow(bc) + + +class uState(libpry.AutoTree): + def test_backup(self): + bc = proxy.BrowserConnection("address", 22) + c = console.State() + f = console.Flow(bc) + c.add_browserconnect(f) + + f.backup() + c.revert(f) + + def test_flow(self): + """ + normal flow: + + connect -> request -> response + """ + bc = proxy.BrowserConnection("address", 22) + c = console.State() + f = console.Flow(bc) + c.add_browserconnect(f) + assert c.lookup(bc) + assert c.get_focus() == (f, 0) + + req = treq(bc) + assert c.add_request(req) + assert len(c.flow_list) == 1 + assert c.lookup(req) + + newreq = treq() + assert not c.add_request(newreq) + assert not c.lookup(newreq) + + resp = tresp(req) + assert c.add_response(resp) + assert len(c.flow_list) == 1 + assert f.waiting == False + assert c.lookup(resp) + + newresp = tresp() + assert not c.add_response(newresp) + assert not c.lookup(newresp) + + def test_err(self): + bc = proxy.BrowserConnection("address", 22) + c = console.State() + f = console.Flow(bc) + c.add_browserconnect(f) + e = proxy.Error(bc, "message") + assert c.add_error(e) + + e = proxy.Error(proxy.BrowserConnection("address", 22), "message") + assert not c.add_error(e) + + def test_view(self): + c = console.State() + + f = tflow() + c.add_browserconnect(f) + assert len(c.view) == 1 + c.set_limit(filt.parse("~q")) + assert len(c.view) == 0 + c.set_limit(None) + + + f = tflow() + req = treq(f.connection) + c.add_browserconnect(f) + c.add_request(req) + assert len(c.view) == 2 + c.set_limit(filt.parse("~q")) + assert len(c.view) == 1 + c.set_limit(filt.parse("~s")) + assert len(c.view) == 0 + + def test_focus(self): + """ + normal flow: + + connect -> request -> response + """ + c = console.State() + + bc = proxy.BrowserConnection("address", 22) + f = console.Flow(bc) + c.add_browserconnect(f) + assert c.get_focus() == (f, 0) + assert c.get_from_pos(0) == (f, 0) + assert c.get_from_pos(1) == (None, None) + assert c.get_next(0) == (None, None) + + bc2 = proxy.BrowserConnection("address", 22) + f2 = console.Flow(bc2) + c.add_browserconnect(f2) + assert c.get_focus() == (f, 1) + assert c.get_next(0) == (f, 1) + assert c.get_prev(1) == (f2, 0) + assert c.get_next(1) == (None, None) + + c.set_focus(0) + assert c.get_focus() == (f2, 0) + c.set_focus(-1) + assert c.get_focus() == (f2, 0) + + c.delete_flow(f2) + assert c.get_focus() == (f, 0) + c.delete_flow(f) + assert c.get_focus() == (None, None) + + def _add_request(self, state): + f = tflow() + state.add_browserconnect(f) + q = treq(f.connection) + state.add_request(q) + return f + + def _add_response(self, state): + f = self._add_request(state) + r = tresp(f.request) + state.add_response(r) + + def test_focus_view(self): + c = console.State() + self._add_request(c) + self._add_response(c) + self._add_request(c) + self._add_response(c) + self._add_request(c) + self._add_response(c) + c.set_limit(filt.parse("~q")) + assert len(c.view) == 3 + assert c.focus == 2 + + def test_delete_last(self): + c = console.State() + f1 = tflow() + f2 = tflow() + c.add_browserconnect(f1) + c.add_browserconnect(f2) + c.set_focus(1) + c.delete_flow(f1) + assert c.focus == 0 + + def test_kill_flow(self): + c = console.State() + f = tflow() + c.add_browserconnect(f) + c.kill_flow(f) + assert not c.flow_list + + def test_clear(self): + c = console.State() + f = tflow() + c.add_browserconnect(f) + f.intercepting = True + + c.clear() + assert len(c.flow_list) == 1 + f.intercepting = False + c.clear() + assert len(c.flow_list) == 0 + + +class uFlow(libpry.AutoTree): + def test_match(self): + f = tflow() + f.response = tresp() + f.request = f.response.request + assert not f.match(filt.parse("~b test")) + + def test_backup(self): + f = tflow() + f.backup() + f.revert() + + def test_simple(self): + f = tflow() + assert f.get_text() + + f.request = treq() + assert f.get_text() + + f.response = tresp() + f.response.headers["content-type"] = ["text/html"] + assert f.get_text() + f.response.code = 404 + assert f.get_text() + + f.focus = True + assert f.get_text() + + f.connection = console.ReplayConnection() + assert f.get_text() + + f.response = None + assert f.get_text() + + f.error = proxy.Error(200, "test") + assert f.get_text() + + def test_kill(self): + f = tflow() + f.request = treq() + f.intercept() + assert not f.request.acked + f.kill() + assert f.request.acked + f.intercept() + f.response = tresp() + f.request = f.response.request + f.request.ack() + assert not f.response.acked + f.kill() + assert f.response.acked + + def test_accept_intercept(self): + f = tflow() + f.request = treq() + f.intercept() + assert not f.request.acked + f.accept_intercept() + assert f.request.acked + f.response = tresp() + f.request = f.response.request + f.intercept() + f.request.ack() + assert not f.response.acked + f.accept_intercept() + assert f.response.acked + + +class uformat_keyvals(libpry.AutoTree): + def test_simple(self): + assert console.format_keyvals( + [ + ("aa", "bb"), + ("cc", "dd"), + ] + ) + + +tests = [ + uFlow(), + uformat_keyvals(), + uState() +] diff --git a/test/test_filt.py b/test/test_filt.py new file mode 100644 index 000000000..3cf0f6cdf --- /dev/null +++ b/test/test_filt.py @@ -0,0 +1,220 @@ +import cStringIO +from libmproxy import filt, proxy, utils +import libpry + + +class uParsing(libpry.AutoTree): + def _dump(self, x): + c = cStringIO.StringIO() + x.dump(fp=c) + assert c.getvalue() + + def test_simple(self): + assert not filt.parse("~b") + assert filt.parse("~q") + assert filt.parse("~c 10") + assert filt.parse("~u foobar") + assert filt.parse("~q ~c 10") + p = filt.parse("~q ~c 10") + self._dump(p) + assert len(p.lst) == 2 + + def test_naked_url(self): + #a = filt.parse("foobar") + #assert a.lst[0].expr == "foobar" + + a = filt.parse("foobar ~h rex") + assert a.lst[0].expr == "foobar" + assert a.lst[1].expr == "rex" + self._dump(a) + + def test_quoting(self): + a = filt.parse("~u 'foo ~u bar' ~u voing") + assert a.lst[0].expr == "foo ~u bar" + assert a.lst[1].expr == "voing" + self._dump(a) + + a = filt.parse("~u foobar") + assert a.expr == "foobar" + + a = filt.parse(r"~u 'foobar\"\''") + assert a.expr == "foobar\"'" + + a = filt.parse(r'~u "foo \'bar"') + assert a.expr == "foo 'bar" + + def test_nesting(self): + a = filt.parse("(~u foobar & ~h voing)") + assert a.lst[0].expr == "foobar" + self._dump(a) + + def test_not(self): + a = filt.parse("!~h test") + assert a.itm.expr == "test" + a = filt.parse("!(~u test & ~h bar)") + assert a.itm.lst[0].expr == "test" + self._dump(a) + + def test_binaryops(self): + a = filt.parse("~u foobar | ~h voing") + isinstance(a, filt.FOr) + self._dump(a) + + a = filt.parse("~u foobar & ~h voing") + isinstance(a, filt.FAnd) + self._dump(a) + + def test_wideops(self): + a = filt.parse("~hq 'header: qvalue'") + assert isinstance(a, filt.FHeadRequest) + self._dump(a) + + +class uMatching(libpry.AutoTree): + def req(self): + conn = proxy.BrowserConnection("one", 2222) + headers = utils.Headers() + headers["header"] = ["qvalue"] + return proxy.Request( + conn, + "host", + 80, + "http", + "GET", + "/path", + headers, + "content_request" + ) + + def resp(self): + q = self.req() + headers = utils.Headers() + headers["header_response"] = ["svalue"] + return proxy.Response( + q, + 200, + "HTTP/1.1", + "message", + headers, + "content_response" + ) + + def q(self, q, o): + return filt.parse(q)(o) + + def test_fcontenttype(self): + q = self.req() + s = self.resp() + assert not self.q("~t content", q) + assert not self.q("~t content", s) + + q.headers["content-type"] = ["text/json"] + assert self.q("~t json", q) + assert self.q("~tq json", q) + assert not self.q("~ts json", q) + + s.headers["content-type"] = ["text/json"] + assert self.q("~t json", s) + + del s.headers["content-type"] + s.request.headers["content-type"] = ["text/json"] + assert self.q("~t json", s) + assert self.q("~tq json", s) + assert not self.q("~ts json", s) + + def test_freq_fresp(self): + q = self.req() + s = self.resp() + + assert self.q("~q", q) + assert not self.q("~q", s) + + assert not self.q("~s", q) + assert self.q("~s", s) + + def test_head(self): + q = self.req() + s = self.resp() + assert not self.q("~h nonexistent", q) + assert self.q("~h qvalue", q) + assert self.q("~h header", q) + assert self.q("~h 'header: qvalue'", q) + + assert self.q("~h 'header: qvalue'", s) + assert self.q("~h 'header_response: svalue'", s) + + assert self.q("~hq 'header: qvalue'", s) + assert not self.q("~hq 'header_response: svalue'", s) + + assert self.q("~hq 'header: qvalue'", q) + assert not self.q("~hq 'header_request: svalue'", q) + + assert not self.q("~hs 'header: qvalue'", s) + assert self.q("~hs 'header_response: svalue'", s) + assert not self.q("~hs 'header: qvalue'", q) + + def test_body(self): + q = self.req() + s = self.resp() + assert not self.q("~b nonexistent", q) + assert self.q("~b content", q) + assert self.q("~b response", s) + assert self.q("~b content_request", s) + + assert self.q("~bq content", q) + assert self.q("~bq content", s) + assert not self.q("~bq response", q) + assert not self.q("~bq response", s) + + assert not self.q("~bs content", q) + assert self.q("~bs content", s) + assert not self.q("~bs nomatch", s) + assert not self.q("~bs response", q) + assert self.q("~bs response", s) + + def test_url(self): + q = self.req() + s = self.resp() + assert self.q("~u host", q) + assert self.q("~u host/path", q) + assert not self.q("~u moo/path", q) + + assert self.q("~u host", s) + assert self.q("~u host/path", s) + assert not self.q("~u moo/path", s) + + def test_code(self): + q = self.req() + s = self.resp() + assert not self.q("~c 200", q) + assert self.q("~c 200", s) + assert not self.q("~c 201", s) + + def test_and(self): + s = self.resp() + assert self.q("~c 200 & ~h head", s) + assert not self.q("~c 200 & ~h nohead", s) + assert self.q("(~c 200 & ~h head) & ~b content", s) + assert not self.q("(~c 200 & ~h head) & ~b nonexistent", s) + assert not self.q("(~c 200 & ~h nohead) & ~b content", s) + + def test_or(self): + s = self.resp() + assert self.q("~c 200 | ~h nohead", s) + assert self.q("~c 201 | ~h head", s) + assert not self.q("~c 201 | ~h nohead", s) + assert self.q("(~c 201 | ~h nohead) | ~s", s) + assert not self.q("(~c 201 | ~h nohead) | ~q", s) + + def test_not(self): + s = self.resp() + assert not self.q("! ~c 200", s) + assert self.q("! ~c 201", s) + assert self.q("!~c 201 !~c 202", s) + assert not self.q("!~c 201 !~c 200", s) + + +tests = [ + uMatching(), + uParsing() +] diff --git a/test/test_proxy.py b/test/test_proxy.py new file mode 100644 index 000000000..90cfbbfb4 --- /dev/null +++ b/test/test_proxy.py @@ -0,0 +1,259 @@ +import threading, urllib, Queue, urllib2, cStringIO +import libpry +import serv, sslserv +from libmproxy import proxy, controller, utils +import random + +# Yes, the random ports are horrible. During development, sockets are often not +# properly closed during error conditions, which means you have to wait until +# you can re-bind to the same port. This is a pain in the ass, so we just pick +# a random port and keep moving. +PROXL_PORT = random.randint(10000, 20000) +HTTP_PORT = random.randint(20000, 30000) +HTTPS_PORT = random.randint(30000, 40000) + + +class TestMaster(controller.Master): + def __init__(self, port, testq): + serv = proxy.ProxyServer(port) + controller.Master.__init__(self, serv) + self.testq = testq + self.log = [] + + def clear(self): + self.log = [] + + def handle(self, m): + self.log.append(m) + m.ack() + + +class ProxyThread(threading.Thread): + def __init__(self, port, testq): + self.tmaster = TestMaster(port, testq) + threading.Thread.__init__(self) + + def run(self): + self.tmaster.run() + + def shutdown(self): + self.tmaster.shutdown() + + +class ServerThread(threading.Thread): + def __init__(self, server): + self.server = server + threading.Thread.__init__(self) + + def run(self): + self.server.serve_forever() + + def shutdown(self): + self.server.shutdown() + + +class _TestServers(libpry.TestContainer): + def setUpAll(self): + proxy.config = proxy.Config("data/testkey.pem") + self.tqueue = Queue.Queue() + # We don't make any concurrent requests, so we can access + # the attributes on this object safely. + self.proxthread = ProxyThread(PROXL_PORT, self.tqueue) + self.threads = [ + ServerThread(serv.make(HTTP_PORT)), + ServerThread(sslserv.make(HTTPS_PORT)), + self.proxthread + ] + for i in self.threads: + i.start() + + def setUp(self): + self.proxthread.tmaster.clear() + + def tearDownAll(self): + for i in self.threads: + i.shutdown() + + +class _ProxTests(libpry.AutoTree): + def log(self): + pthread = self.findAttr("proxthread") + return pthread.tmaster.log + + +class uSanity(_ProxTests): + def test_http(self): + """ + Just check that the HTTP server is running. + """ + f = urllib.urlopen("http://127.0.0.1:%s"%HTTP_PORT) + assert f.read() + + def test_https(self): + """ + Just check that the HTTPS server is running. + """ + f = urllib.urlopen("https://127.0.0.1:%s"%HTTPS_PORT) + assert f.read() + + +class uProxy(_ProxTests): + HOST = "127.0.0.1" + def _get(self, host=HOST): + r = urllib2.Request("http://%s:%s"%(host, HTTP_PORT)) + r.set_proxy("127.0.0.1:%s"%PROXL_PORT, "http") + return urllib2.urlopen(r) + + def _sget(self, host=HOST): + proxy_support = urllib2.ProxyHandler( + {"https" : "https://127.0.0.1:%s"%PROXL_PORT} + ) + opener = urllib2.build_opener(proxy_support) + r = urllib2.Request("https://%s:%s"%(host, HTTPS_PORT)) + return opener.open(r) + + def test_http(self): + f = self._get() + assert f.code == 200 + assert f.read() + f.close() + + l = self.log() + assert l[0].address + assert l[1].headers.has_key("host") + assert l[2].code == 200 + + def test_https(self): + f = self._sget() + assert f.code == 200 + assert f.read() + f.close() + + l = self.log() + assert l[0].address + assert l[1].headers.has_key("host") + assert l[2].code == 200 + + # Disable these two for now: they take a long time. + def _test_http_nonexistent(self): + f = self._get("nonexistent") + assert f.code == 200 + assert "Error" in f.read() + + def _test_https_nonexistent(self): + f = self._sget("nonexistent") + assert f.code == 200 + assert "Error" in f.read() + + + +class u_parse_proxy_request(libpry.AutoTree): + def test_simple(self): + libpry.raises(proxy.ProxyError, proxy.parse_proxy_request, "") + + u = "GET ... HTTP/1.1" + libpry.raises("invalid url", proxy.parse_proxy_request, u) + + u = "MORK / HTTP/1.1" + libpry.raises("unknown request method", proxy.parse_proxy_request, u) + + u = "GET http://foo.com:8888/test HTTP/1.1" + m, s, h, po, pa = proxy.parse_proxy_request(u) + assert m == "GET" + assert s == "http" + assert h == "foo.com" + assert po == 8888 + assert pa == "/test" + + def test_connect(self): + u = "CONNECT host.com:443 HTTP/1.0" + expected = ('CONNECT', None, 'host.com', 443, None) + ret = proxy.parse_proxy_request(u) + assert expected == ret + + def test_inner(self): + u = "GET / HTTP/1.1" + assert proxy.parse_proxy_request(u) == ('GET', None, None, None, '/') + + +class u_parse_url(libpry.AutoTree): + def test_simple(self): + assert not proxy.parse_url("") + + u = "http://foo.com:8888/test" + s, h, po, pa = proxy.parse_url(u) + assert s == "http" + assert h == "foo.com" + assert po == 8888 + assert pa == "/test" + + s, h, po, pa = proxy.parse_url("http://foo/bar") + assert s == "http" + assert h == "foo" + assert po == 80 + assert pa == "/bar" + + s, h, po, pa = proxy.parse_url("http://foo") + assert pa == "/" + + +class uConfig(libpry.AutoTree): + def test_pem(self): + c = proxy.Config(pemfile="data/testkey.pem") + assert c.pemfile + + +class uFileLike(libpry.AutoTree): + def test_wrap(self): + s = cStringIO.StringIO("foobar\nfoobar") + s = proxy.FileLike(s) + s.flush() + assert s.readline() == "foobar\n" + assert s.readline() == "foobar" + + +class uRequest(libpry.AutoTree): + def test_simple(self): + h = utils.Headers() + h["test"] = ["test"] + c = proxy.BrowserConnection("addr", 2222) + r = proxy.Request(c, "host", 22, "https", "GET", "/", h, "content") + u = r.url() + assert r.set_url(u) + assert not r.set_url("") + assert r.url() == u + assert r.short() + assert r.assemble() + + +class uResponse(libpry.AutoTree): + def test_simple(self): + h = utils.Headers() + h["test"] = ["test"] + c = proxy.BrowserConnection("addr", 2222) + req = proxy.Request(c, "host", 22, "https", "GET", "/", h, "content") + resp = proxy.Response(req, 200, "HTTP", "msg", h.copy(), "content") + assert resp.short() + assert resp.assemble() + + +class uProxyError(libpry.AutoTree): + def test_simple(self): + p = proxy.ProxyError(111, "msg") + assert repr(p) + + + +tests = [ + uProxyError(), + uRequest(), + uResponse(), + uFileLike(), + uConfig(), + u_parse_proxy_request(), + u_parse_url(), + _TestServers(), [ + uSanity(), + uProxy(), + ] +] diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 000000000..8a4da968b --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,221 @@ +import textwrap, cStringIO, os +import libpry +from libmproxy import utils + + +class uisBin(libpry.AutoTree): + def test_simple(self): + assert not utils.isBin("testing\n\r") + assert utils.isBin("testing\x01") + assert utils.isBin("testing\x0e") + assert utils.isBin("testing\x7f") + + +class uhexdump(libpry.AutoTree): + def test_simple(self): + assert utils.hexdump("one\0"*10) + + +class upretty_size(libpry.AutoTree): + def test_simple(self): + assert utils.pretty_size(100) == "100B" + assert utils.pretty_size(1024) == "1kB" + assert utils.pretty_size(1024 + (1024/2)) == "1.5kB" + assert utils.pretty_size(1024*1024) == "1M" + + +class uData(libpry.AutoTree): + def test_nonexistent(self): + libpry.raises("does not exist", utils.data.path, "nonexistent") + + +class uMultiDict(libpry.AutoTree): + def setUp(self): + self.md = utils.MultiDict() + + def test_setget(self): + assert not self.md.has_key("foo") + self.md.append("foo", 1) + assert self.md["foo"] == [1] + assert self.md.has_key("foo") + + def test_del(self): + self.md.append("foo", 1) + del self.md["foo"] + assert not self.md.has_key("foo") + + def test_extend(self): + self.md.append("foo", 1) + self.md.extend("foo", [2, 3]) + assert self.md["foo"] == [1, 2, 3] + + def test_extend_err(self): + self.md.append("foo", 1) + libpry.raises("not iterable", self.md.extend, "foo", 2) + + def test_get(self): + self.md.append("foo", 1) + self.md.append("foo", 2) + assert self.md.get("foo") == [1, 2] + assert self.md.get("bar") == None + + def test_caseSensitivity(self): + self.md._helper = (utils._caseless,) + self.md["foo"] = [1] + self.md.append("FOO", 2) + assert self.md["foo"] == [1, 2] + assert self.md["FOO"] == [1, 2] + assert self.md.has_key("FoO") + + def test_dict(self): + self.md.append("foo", 1) + self.md.append("foo", 2) + self.md["bar"] = [3] + assert self.md == self.md + assert dict(self.md) == self.md + + def test_copy(self): + self.md["foo"] = [1, 2] + self.md["bar"] = [3, 4] + md2 = self.md.copy() + assert md2 == self.md + assert id(md2) != id(self.md) + + def test_clear(self): + self.md["foo"] = [1, 2] + self.md["bar"] = [3, 4] + self.md.clear() + assert not self.md.keys() + + def test_setitem(self): + libpry.raises(ValueError, self.md.__setitem__, "foo", "bar") + self.md["foo"] = ["bar"] + assert self.md["foo"] == ["bar"] + + def test_itemPairs(self): + self.md.append("foo", 1) + self.md.append("foo", 2) + self.md.append("bar", 3) + l = list(self.md.itemPairs()) + assert len(l) == 3 + assert ("foo", 1) in l + assert ("foo", 2) in l + assert ("bar", 3) in l + + +class uHeaders(libpry.AutoTree): + def setUp(self): + self.hd = utils.Headers() + + def test_read_simple(self): + data = """ + Header: one + Header2: two + \r\n + """ + data = textwrap.dedent(data) + data = data.strip() + s = cStringIO.StringIO(data) + self.hd.read(s) + assert self.hd["header"] == ["one"] + assert self.hd["header2"] == ["two"] + + def test_read_multi(self): + data = """ + Header: one + Header: two + \r\n + """ + data = textwrap.dedent(data) + data = data.strip() + s = cStringIO.StringIO(data) + self.hd.read(s) + assert self.hd["header"] == ["one", "two"] + + def test_read_continued(self): + data = """ + Header: one + \ttwo + Header2: three + \r\n + """ + data = textwrap.dedent(data) + data = data.strip() + s = cStringIO.StringIO(data) + self.hd.read(s) + assert self.hd["header"] == ['one\r\n two'] + + def test_dictToHeader1(self): + self.hd.append("one", "uno") + self.hd.append("two", "due") + self.hd.append("two", "tre") + expected = [ + "one: uno\r\n", + "two: due\r\n", + "two: tre\r\n", + "\r\n" + ] + out = repr(self.hd) + for i in expected: + assert out.find(i) >= 0 + + def test_dictToHeader2(self): + self.hd["one"] = ["uno"] + expected1 = "one: uno\r\n" + expected2 = "\r\n" + out = repr(self.hd) + assert out.find(expected1) >= 0 + assert out.find(expected2) >= 0 + + def test_match_re(self): + h = utils.Headers() + h.append("one", "uno") + h.append("two", "due") + h.append("two", "tre") + assert h.match_re("uno") + assert h.match_re("two: due") + assert not h.match_re("nonono") + + + +class uisStringLike(libpry.AutoTree): + def test_all(self): + assert utils.isStringLike("foo") + assert not utils.isStringLike([1, 2, 3]) + assert not utils.isStringLike((1, 2, 3)) + assert not utils.isStringLike(["1", "2", "3"]) + + +class uisSequenceLike(libpry.AutoTree): + def test_all(self): + assert utils.isSequenceLike([1, 2, 3]) + assert utils.isSequenceLike((1, 2, 3)) + assert not utils.isSequenceLike("foobar") + assert utils.isSequenceLike(["foobar", "foo"]) + x = iter([1, 2, 3]) + assert utils.isSequenceLike(x) + assert not utils.isSequenceLike(1) + + +class umake_bogus_cert(libpry.AutoTree): + def test_all(self): + d = self.tmpdir() + path = os.path.join(d, "foo", "cert") + utils.make_bogus_cert(path) + + d = open(path).read() + assert "PRIVATE KEY" in d + assert "CERTIFICATE" in d + + +tests = [ + umake_bogus_cert(), + uisBin(), + uhexdump(), + upretty_size(), + uisStringLike(), + uisSequenceLike(), + uMultiDict(), + uHeaders(), + uData(), +] diff --git a/test/tserv b/test/tserv new file mode 100755 index 000000000..5b35b72ae --- /dev/null +++ b/test/tserv @@ -0,0 +1,30 @@ +#!/usr/bin/env python +""" + A simple program for testing the test HTTP/S servers. +""" +from optparse import OptionParser, OptionGroup +import sslserv, serv + +if __name__ == "__main__": + parser = OptionParser( + usage = "%prog [options] output", + version="%prog 0.1", + ) + parser.add_option( + "-s", "--ssl", action="store_true", + dest="ssl", default=False + ) + options, args = parser.parse_args() + + if options.ssl: + port = 8443 + print "Running on port %s"%port + s = sslserv.make(port) + else: + port = 8080 + print "Running on port %s"%port + s = serv.make(port) + try: + s.serve_forever() + except KeyboardInterrupt: + pass diff --git a/todo b/todo new file mode 100644 index 000000000..5ba41f76e --- /dev/null +++ b/todo @@ -0,0 +1,17 @@ + +Future: + + - Strings view. + - Field parsing and editing. + - On-the-fly generation of keys, signed with a CA + - Pass-through fast-track for things that don't match filter? + - Reading contents from file + - Saving contents to file + - Shortcut for viewing in pager + - Serializing and de-serializing requests and responses. + + +Bugs: + + - In some circumstances, long URLs in list view are line-broken oddly. + - Termination sometimes hangs.