diff --git a/BitCycle/bitcycle.css b/BitCycle/bitcycle.css index e212cac..af780f9 100644 --- a/BitCycle/bitcycle.css +++ b/BitCycle/bitcycle.css @@ -47,9 +47,12 @@ .green { color: #0c0; } +.hide { + display: none; +} h1 { text-align: center; - font-family: Consolas, 'Courier New' ,monospace; + font-family: Consolas, 'Courier New', monospace; } a { text-decoration: none; diff --git a/BitCycle/bitcycle.js b/BitCycle/bitcycle.js index 527252b..a4d68bf 100644 --- a/BitCycle/bitcycle.js +++ b/BitCycle/bitcycle.js @@ -10,9 +10,12 @@ const ZERO_BIT = 1; const ONE_BIT = 2; const BIT_SHAPE_RADIUS = 7; -const GRID_SQUARE_SIZE = 16; +const GRID_SQUARE_SIZE = 24; const GRID_FONT_SIZE = 14; +const DEFAULT_TICKS_PER_SECOND = 10; +const DEFAULT_FRAMES_PER_TICK = 1; + const BLUE = "#ABF"; const GREEN = "#6E6"; const TEAL = "#4A9"; @@ -177,14 +180,14 @@ function Source(x, y, inputString, ioFormat) { } else if (ioFormat === "signed") { inputString = inputString.split(",").map(intToSignedUnary).join("0"); } - this.queue = inputString.split(""); + this.queue = inputString.split("").map(value => new Bit(this.x, this.y, EAST, value)); this.open = (this.queue.length > 0); } Source.prototype.tick = function() { var outBit = null; if (this.open) { - outBit = new Bit(this.x, this.y, EAST, this.queue.shift()); + outBit = this.queue.shift(); if (this.queue.length === 0) { this.open = false; } @@ -255,10 +258,12 @@ Device.prototype.toString = function() { } // Define Program class -function Program(codeLines, inputLines, speed, ioFormat, expand) { +function Program(codeLines, inputLines, ticksPerSecond, ioFormat, expand) { var sinkNumber = 0; - this.setSpeed(speed); + const framesPerTick = DEFAULT_FRAMES_PER_TICK; // TODO: let user set this + this.setSpeed(ticksPerSecond, framesPerTick); + this.frame = 0; this.done = false; this.paused = true; @@ -344,18 +349,34 @@ function Program(codeLines, inputLines, speed, ioFormat, expand) { } } -Program.prototype.setSpeed = function(speed) { - this.speed = +speed || 10; +Program.prototype.setSpeed = function(ticksPerSecond, framesPerTick) { + this.ticksPerSecond = +ticksPerSecond || this.ticksPerSecond || DEFAULT_TICKS_PER_SECOND; + this.framesPerTick = +framesPerTick || this.framesPerTick || DEFAULT_FRAMES_PER_TICK; + this.speed = this.ticksPerSecond * this.framesPerTick; } Program.prototype.run = function() { this.paused = false; - this.tick(); + this.step(); if (!this.done) { this.timeout = window.setTimeout(this.run.bind(this), 1000 / this.speed); } } +Program.prototype.step = function() { + // Step one frame forward + this.frame++; + + if (this.frame === this.framesPerTick) { + // Move the program state forward one tick and display the current + // state of the playfield + this.tick(); + } else { + // Display the current state of the playfield + this.displayPlayfield(); + } +} + Program.prototype.tick = function() { if (this.done) { haltProgram(); @@ -539,7 +560,8 @@ Program.prototype.tick = function() { } // Display the current state of the playfield - displaySource(this.grid, this.activeBits); + this.frame = 0; + this.displayPlayfield(); } Program.prototype.reset = function() { @@ -578,21 +600,26 @@ Program.prototype.halt = function() { } } -function displaySource(grid, activeBits) { +Program.prototype.displayPlayfield = function() { clearCanvas(); - // Attach active bits to the devices at those coordinates - for (var b = 0; b < activeBits.length; b++) { - var bit = activeBits[b]; - grid[bit.y][bit.x].bitCode |= (bit.value ? ONE_BIT : ZERO_BIT); - } - // Display active bits and devices - for (var y = 0; y < grid.length; y++) { - var row = grid[y]; + // Display all zero bits on the playfield first + for (var b = 0; b < this.activeBits.length; b++) { + if (this.activeBits[b].value === 0) { + drawBitOffset(this.activeBits[b], this.frame / this.framesPerTick ); + } + } + // Then display all one bits on the playfield + for (var b = 0; b < this.activeBits.length; b++) { + if (this.activeBits[b].value === 1) { + drawBitOffset(this.activeBits[b], this.frame / this.framesPerTick ); + } + } + // Then display devices + for (var y = 0; y < this.grid.length; y++) { + var row = this.grid[y]; for (var x = 0; x < row.length; x++) { var device = row[x]; - drawBitsAt(device.bitCode, x, y); drawDeviceAt(device, x, y); - device.bitCode = 0; } } } @@ -603,17 +630,24 @@ function clearCanvas() { } } -function drawBitsAt(bitCode, x, y) { +function drawBitOffset(bit, offsetAmount) { + var x = bit.x + dx(bit.direction) * offsetAmount; + var y = bit.y + dy(bit.direction) * offsetAmount; + var bitCode = (bit.value ? ONE_BIT : ZERO_BIT); + drawBitsAt(bitCode, x, y); +} + +function drawBitsAt(bitCode, x, y, scale=1) { if (bitCode > 0) { var centerX = (x + 0.5) * GRID_SQUARE_SIZE; var centerY = (y + 0.5) * GRID_SQUARE_SIZE; if (bitCode === ZERO_BIT) { - drawCircle(centerX, centerY, BIT_SHAPE_RADIUS, BLUE); + drawCircle(centerX, centerY, BIT_SHAPE_RADIUS * scale, BLUE); } else if (bitCode === ONE_BIT) { - drawDiamond(centerX, centerY, BIT_SHAPE_RADIUS, GREEN); + drawDiamond(centerX, centerY, BIT_SHAPE_RADIUS * scale, GREEN); } else { // Both a zero and a one bit - drawCircle(centerX, centerY, BIT_SHAPE_RADIUS, BLUE); - drawDiamond(centerX, centerY, BIT_SHAPE_RADIUS, GREEN); + drawCircle(centerX, centerY, BIT_SHAPE_RADIUS * scale, BLUE); + drawDiamond(centerX, centerY, BIT_SHAPE_RADIUS * scale, GREEN); } } } @@ -640,6 +674,18 @@ function drawDeviceAt(device, x, y) { var textY = (y + 0.5) * GRID_SQUARE_SIZE + 0.25 * GRID_FONT_SIZE; context.fillStyle = BLACK; context.fillText(device.toString(), textX, textY); + + if (device instanceof Collector || device instanceof Source) { + for (let i = 0; i < Math.min(device.queue.length, 6); i++) { + let bit = device.queue[i]; + drawBitsAt( + (bit.value ? ONE_BIT : ZERO_BIT), + x - (1 / 7 + i / 7) + 0.5, + y + 1 / 7 - 0.5, + 0.25 + ); + } + } } function urlDecode(value) { @@ -777,6 +823,19 @@ function hideEditor() { executionControls.style.display = "block"; } +function toggleCheatSheet() { + var cheatSheet = document.getElementById('cheat-sheet'), + indicator = document.getElementById('cheat-sheet-indicator'); + + if (cheatSheet.classList.contains("hide")) { + cheatSheet.classList.remove("hide"); + indicator.innerText = "-"; + } else { + cheatSheet.classList.add("hide"); + indicator.innerText = "+"; + } +} + function loadProgram() { var sourceCode = document.getElementById('source'), ticksPerSecond = document.getElementById('ticks-per-second'), @@ -803,7 +862,7 @@ function loadProgram() { context.font = "bold " + GRID_FONT_SIZE + "px Courier New"; // Display the current state of the playfield - displaySource(program.grid, program.activeBits); + program.displayPlayfield(); runPause.style.display = "block"; step.style.display = "block"; @@ -853,7 +912,7 @@ function runPauseBtnClick() { if (program !== null && !program.done) { if (program.paused) { var ticksPerSecond = document.getElementById('ticks-per-second'); - program.setSpeed(ticksPerSecond.innerText); // TBD: is innerText the best way to do this? + program.setSpeed(ticksPerSecond.innerText); runPause.value = "Pause"; program.run(); } else { @@ -867,7 +926,7 @@ function stepBtnClick() { var runPause = document.getElementById('run-pause'); if (program !== null && !program.done) { program.pause(); - program.tick(); + program.step(); runPause.value = "Run"; } } diff --git a/BitCycle/index.html b/BitCycle/index.html index 832e226..13d098f 100644 --- a/BitCycle/index.html +++ b/BitCycle/index.html @@ -2,7 +2,8 @@ BitCycle interpreter @@ -50,6 +51,41 @@

+

Cheat Sheet +

+
+

+ <, ^, >, and v + change a bit's direction unconditionally +

+

+ + changes a bit's direction conditionally: + 0 turns left; 1 turns right +

+

+ / and \ (splitters) deflect first bit, + pass others straight through +

+

+ = (switch) changes form based on first bit: + if 0, becomes { and sends following bits west; + if 1, becomes } and sends following bits east +

+

+ A-U and W-Z (collectors) + store bits in a queue when closed and emit them, moving east, when open +

+

? (source) emits bits from input, moving east

+

! (sink) outputs bits

+

+ ~ (dupneg) copies a bit; + original copy turns right, inverted copy turns left +

+

+ 0 and 1 create a bit at the start of the + program, moving east +

+

@ halts program

+
diff --git a/Exceptionally/README.md b/Exceptionally/README.md index 35f4345..138c31d 100644 --- a/Exceptionally/README.md +++ b/Exceptionally/README.md @@ -62,7 +62,7 @@ The interpreter supports two flags. The `-V` or `--version` flag prints the vers > whython exceptionally.why -q program.exc -If you don't have Whython installed, you can run it at [Attempt This Online](https://ato.pxeger.com/about). Here is [an adapted version of the Exceptionally interpreter](https://staging.ato.pxeger.com/run?1=nVjNcts2EJ5edeojoHBTk7ak2IfOdOgorus6Hc-ktseq0-nQigamIAkJSTAgaElV3RfpJZdOn6Avk6fpAiTAP8lJyoMp7C7259sfgP7rn8V8Jec8fv_hy39ZlHAhkaCd4le6SjudV2fXw_PLCzRA-KD_bf8Adzo_nF-cXP8GlHUHwYP3sYcwmUxwN1_31DrN7sx6T62jLDTrp2o9Yfdm_UTzud3_Wq0TvjDrrlpPBY8kNyRPkZikkSH42mbIAqoEDXVkqeXW77U2FltzO4oQ8CyWhvKH3paEzFK-VpQ3nMWG8I32kTBhCH9qEJKEWsUPnc7p5c9XJ9fnQ41fgdZACdJ3GbF4PFOUkKapITxXhJmgRFJRKrupo36ihLgAawjtoCimEY9Z4KGTNGAsFznVkc2tjz9q4PnCRnGmfQlpZE2_0OiELDGEn7Qv1CJxrpEvsXqZO29VXhZuWc_xtaIIEs8qSRhqhKWVuVHrzBp9pf26LyH6VREWgiQlHC9uLk6HJRyq_jwUkuhuQtCyi1YeWqJ9tCoUqHJssXuWraqzxd6zbFWsLfbTcjffYPuJZRel25AIWSodDYujCK5bSOuqbmrzVyMTiK3xtoxXE2qbXPqe1aM7oMnvA5EunZXxJe-KlpQml1J5p7SkNLmU0t3TEFr1FdVZGhndUA0ZX72M10WDbUh0iVDeWy2RwcAmRPdaS-CZ5ZvWa4k8LytCdVHJz6vpEB1D3CFfUOGYkPLuytNtoFftVdvre17vcAS7e0vjApU1ERYnmbRK8_6q8jHOoUxhcNMJIOoiNkUsZXEqSRzoGoN-cxENU4qsVKHPdGfupq-XBs6s4StgnceZQS7KOHVzVgX95ci06oROkQSlacJC6gR8Ql1P8yxxMg5ZTFNoZz_fpYRgpV79EBxnieNqBkSliRCWkOmCybmDj7GLuGjTj3BhRz2CMIh8uIolWZ4JwYUzxVeCzwSJUEDimEt0R2csRmovWitl_sHoAedmF3PwUlsoNYIrEfIGcGL2IyIDMLjb34NxXAnQPJVw_KgPJey43qgeWCkaSxZntGqmcAaqAOHjI1zXvYNO8y0sniGCEkHvGc9SpBCtCXLIF5EA1MAo3Orj4TbvwBmFVB2Iz4AYQi8ANt4YhM0DN5JxxVO4XSijJQVIRzgvY0Os7W_WlK86a3-Aphitq7ofELb7lLYmqENVSIhPAdKYLtpoNu3089nk4Eo8AY8iAhG3AddJzZmQ1PxGVfcAjoEkhDGkMBB0pgrtSerAry5aEzF7cDEcMflGv1A1qgRUN1BeRLYbOYQTrW2h3PlxKzePR6GVFnGUBm4-IQKV9Cu8XXMi4EZidG5XsX5ERQGx1vGIiodHVNj4QNcjKr56RAVL1H74u9_2pFmim7rtbJnQAEa7MdhFUzirJ15rnqnnow2_g66IABtyTo3C3RSBX1lEY7l9Dgo8PnaOvdvJ3m3fPb6d7G8eijvoPJZ0RoUa3tOQEwldBucuCWtixh64GvVngmfNmbSDbuDaItKACwplOGEBoJnqvp0Rye4pirPojormIDOKa2fGGLvtwVZxATsYkmO3Cgq5g-MV9nXVl48LTNyswSY6Pun9PtoKygWJ6KcDsMnALvZf49vb0R7e3WJjCJmG0-Kz4d40KC-43dNFcNSTLJQpkly1AdwmGrhXoaz1iQqjVR-QpHgFut5SOJdJinSAFMo6fQtNIudQMpGSCOZEECh8kf6fY1ePcmhBxmOkDgpnbXqyP-UCbDrg9cB47j50KwnedtxUdGpZQWUm4pZ4fjkSWayvRV30LmNUDl4QwLlIWlIcoIPmFaq4Qa1K0OiSBg7Oryq_iIx6SH1DOS_PL86GPktG7pEaLQPk6AFz6ML0hS-3nO8WX1b20d9XDdoaa1m45xVeNfhqgAHzoEVWmQZ6g_yQx0CXAU1k7V6lrgoaig0jr9Mp0iowxn9nctr77n3--vDFJZA6HVAwHsfQRuOxnrjjcURYPB4Xg9ckUl030xUc3GJ2rwaguV-WfDWy7aq4gDm490r3eu8eyg3SWx0Y-UGk5rAKCZgkDFdoXfwTpTp8lWG6hA-kA_eT7b7L7WpgqlY1AYJRObdEu7-f8MRYqfev2afLLa_SeiG6BbwFvv8B). Put your Exceptionally code in the Code box, your input in the Input box, and click Execute. If you want to use a flag, put it in the Arguments box in this format: `["-q"]`. +If you don't have Whython installed, you can run it at [Attempt This Online](https://ato.pxeger.com/about). Here is [an adapted version of the Exceptionally interpreter](https://ato.pxeger.com/run?1=nVhbcxs1FB5e_cRPEAolu4ntJg_MMJuaUELKZKYkmZiUYTauR7FlW3Rv1WpjGxP-CC99YfhHPPTXcI52pb3Facs-xKujc_3ORdr89c9ysVaLOHr3_vN_RZjEUhHJO8Vbuk47nVenV8Ozi3MyIPSg_3X_gHY635-dP7_6FSibDoGH7lOPUDad0m6-7uE6zW7Neg_XYRaY9VNcT8WdWT_R-7GVf43rJF6adRfXMxmHKjYkD0lC8dAQfG0zEBOOjIY6stRS9DutTUTW3A4SJnEWKUP5Q4slgbCUL5HyWywiQ_hK-8iENIQ_NQhJwq3i-07n5OKny-dXZ0ONX4HWABn524xZPJ4hJeBpagjfImEuOVNclsqu66g_1wbTiRE60WEsrEM_aJTjpXX5VBsOeGjtvNBQBCIxhB-1YW7DPtMwl8C8zD21Ki9wHcupdZNeIUWyaF5BfKjhVJbnGteZNfpK-3VX4vELEpaSJWXsL67PT4Zl7FhsHglYeDtlZNUla4-syD5ZFwqw9lrbPbuNpdja3rPbWJmt7aeldPyA7Sd2u6jTBkcgUuVoWBwkuG7BrUu4qc1fj0wgtqDbPF6NqW1y5XtWjy735n4fiHzlrI0veQu0uDS55MrbosWlySWXbpUG07qPVGdleHT3NHh8_DFeF930QKJLhPJGarEMBjYhurFaDM_svumzFsu3lgW7zCNQ56ZCsKtK_ry6Dskx4BDESy4dE2LebXn6TSqw3Wqyvuf1Dkcg3VsZl7iqsYgoyZRVCu-13URChwKs4CAxGvKerHJRmsOfwmTnU2B3iZgRkYooVSya6LqEHnUJD1JOLFehz3R0HoqvlyYFWSMeyE-ORQb5K7HQDV1l9Fcj095TPiMKlKaJCLgziafc9fSeJU7HgYh4CiPAz6WQCVb40w_AcZE4rt6AqDQRwpIqXQq1cOgx1ei06Ee0sIOPZAIiH64jxVanUsbSmdFLGc8lC8mERVGsyC2fi4igLNmgMv9gdE9zs8sFeKktlBrBlZB4AzhS-yFTEzC429-jXVIJ0DyVcPywD2XvuN6oHljJGikRZbxqpnAGKoXQ4yNa171DTnIREc0Jg3rhdyLOUoKI1hhjyBdTANTAKNzq4-E278AZRKoOxCdADKEXABtvDMLmgSvLuOIpXD_QaEkB0hHNy9gQa_LNmvKx-_YHZEbJpqr7nlArh9qaoA6xkEg8A0gjvmyj2bTTz-eZQyvxTOIwZBBxG3Cd1HwTkppfueoewNGRBDC6EAPJ51hoT1IH3rpkw-T83qVwLOWCfqFqVAmobqC8qWw3cginYNtCKflhK9ePR6GVFnGUBq4_IgJM-iXdrjmfkYXO7So2j6goINY6HlFx_4gKGx_oekTFF4-oEAnKw9_9tifNEn2o205XCZ_AaDcGu2QG5_vUa80zfD7Y8DvkkkmwoRbcKNxNCfiVhTxS2-egpONj59i7me7d9N3jm-n-w0Nxh5xFis-5xOE9C2KmoMvgrGZBjc3YA1fD_lzGWXMm7ZBruOrIdBJLDmU4FRNAM9V9O2dK3HESZeEtl81BZhTXzowxdduDreICdSgkx4pKDrmD4xXkuvhp5MImbdZgEx2f9X4fbQXlnIX84wF4yMAu9V_Tm5vRHt3dYmMImYbT4pPhfmhQnsdWpkvgqGdZoFKiYmwDuE00cK9CWesTDKNVH5CkaA263nA4l1lKdIAcyjp9A02iFlAyIXJMFkwyKHyZ_p9jV49yaEERRwQPCmdjerI_iyXYdMDrgfHcve9WErztuKno1LySq0xGLfb8ciSzSF-LuuRtJrgavGCAc5G0pDhAB80rVHGDWpeg8RWfODS_qvwsM-4R_O5yXp6dnw59kYzcIxwtA-LoAXPowvSFr7183y2-xuyjv8katA3VvNQzXjX2cYDB5kGLjJkGeoN8n8fAVxOeqNq9Cq8KGooHRl6nU6RVUkr_ztSs9827_Of9ZxdA6nRAwXgcQRuNx3rijschE9F4XAxek0i8bqZrOLjl_A4HoLlflvs4su2quIA5tPdK93rvDsoN0lsdGPlBhHMYQ4JNFgRrsin-y1IdvmiYr-Cj6sD9aLtvc7samKpVTYBgMOeWaOX7SZwYK_X-NXK63PIqrReiW8Bb4Psf). Put your Exceptionally code in the Code box, your input in the Input box, and click Execute. If you want to use a flag, put it in the Arguments box in this format: `["-q"]`. ## Example programs diff --git a/Exceptionally/commands.md b/Exceptionally/commands.md index 8e09661..59f8be2 100644 --- a/Exceptionally/commands.md +++ b/Exceptionally/commands.md @@ -18,7 +18,7 @@ Command | Name | Description `]` | sliceto | Gets a slice of a string/list ending just before the given index `@` | find | Finds the first index at which an item/substring appears `#` | count | Counts the number of occurrences of an item/substring -`|` | split | Splits a string on occurrences of a substring +`\|` | split | Splits a string on occurrences of a substring `$` | join | Joins a string/list of strings on a given string `&` | pair | Wraps two values in a two-item list `~` | append | Appends a value to the right end of a list diff --git a/README.md b/README.md index be61fb1..6a9afb6 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ A 2D language that involves moving bits around a playfield: picture a cross betw A language that uses exceptions for control flow, inspired by the `?` operator from [Whython](https://www.pxeger.com/2021-09-19-hacking-on-cpython/). [Github](https://github.com/dloscutoff/Esolangs/tree/master/Exceptionally) +| [Attempt This Online](https://ato.pxeger.com/run?L=exceptionally) | [Origins](https://codegolf.stackexchange.com/a/242066/16766) ### Ouroboros @@ -40,6 +41,7 @@ Each line of code is a loop, which can be shortened or lengthened to achieve con Generate strings using regular expression syntax. [Github](https://github.com/dloscutoff/Esolangs/tree/master/Regenerate) +| [Attempt This Online](https://ato.pxeger.com/run?L=regenerate) | [Replit](https://replit.com/@dloscutoff/regenerate) ### Sisi diff --git a/Regenerate/README.md b/Regenerate/README.md index ca543ab..c101e2f 100644 --- a/Regenerate/README.md +++ b/Regenerate/README.md @@ -112,8 +112,10 @@ This feature is particularly useful for taking numbers as program inputs: Curly braces can evaluate simple arithmetic expressions: - > python3 regenerate.py -r 'a{3*$~1+1}' 3 - aaaaaaaaaa + > python3 regenerate.py -r 'a{3*$~1+1}' 2 + aaaaaaa + > python3 regenerate.py -r 'a{3*($~1+1)}' 2 + aaaaaaaaa Use `${...}` to match the result of an arithmetic expression as a string rather than using it as a repetition count: @@ -129,6 +131,32 @@ A backreference that starts with `#` instead of `$` gives the length of the back | Hello, world! | +---------------+ +### Short-circuiting alternation + +The `!` operator is similar to `|`, but it matches at most one of its alternatives. The difference only becomes apparent when multiple matches are requested. + + > python3 regenerate.py -a -r '$1|x{1,2}' + x + xx + > python3 regenerate.py -a -r '$1!x{1,2}' + x + xx + +Here, `|` and `!` behave identically. In both cases, the first alternative (a backreference to a nonexistent group) fails, and the alternation operator tries the second alternative instead. + + > python3 regenerate.py -a -r 'x{1,2}|y{1,2}' + x + xx + y + yy + > python3 regenerate.py -a -r 'x{1,2}!y{1,2}' + x + xx + +When the first alternative succeeds, `|` goes on to try the second alternative, but `!` stops and does not try the second alternative. (This behavior is inspired by the "cut" from Prolog, which also uses the `!` character.) + +Since they are both types of alternation, `|` and `!` have the same precedence and are left-associative: `a!b|c` parses as `a!(b|c)`, and `a|b!c` parses as `a|(b!c)`. + ## Example programs [Hello, world](https://esolangs.org/wiki/Hello,_world!): diff --git a/Regenerate/regenerate.py b/Regenerate/regenerate.py index bf72296..870db73 100644 --- a/Regenerate/regenerate.py +++ b/Regenerate/regenerate.py @@ -8,16 +8,6 @@ ASCII_CHARS = list(map(chr, range(32, 127))) INFINITY = float("inf") -def range_from_to(lower_bound, upper_bound): - """Generates an inclusive range from lower to upper bound. - - An upper bound of infinity is also supported. - """ - i = int(lower_bound) - while i <= upper_bound: - yield i - i += 1 - def isbackreference(string): return re.fullmatch(r".~?\d+", string) is not None @@ -58,7 +48,7 @@ def parse_alternation(tokens): return branches if branches else subexpression def parse_concatenation(tokens): - branches = [""] + branches = ["cat"] subexpression = parse_repetition(tokens) while subexpression is not None: branches.append(subexpression) @@ -75,20 +65,23 @@ def parse_repetition(tokens): while tokens.peek() in ["*", "+", "?", "{"]: repetition_operator = tokens.pop() if repetition_operator == "*": - bounds = ["0", ""] + bounds = ("0", "") elif repetition_operator == "+": - bounds = ["1", ""] + bounds = ("1", "") elif repetition_operator == "?": - bounds = ["0", "1"] + bounds = ("0", "1") elif repetition_operator == "{": lower_bound = parse_numeric_expr(tokens) if tokens.peek() == ",": tokens.pop() upper_bound = parse_numeric_expr(tokens) + bounds = (lower_bound, upper_bound) else: - upper_bound = lower_bound - tokens.pop() - bounds = [lower_bound, upper_bound] + bounds = (lower_bound,) + next_token = tokens.pop() + if next_token != "}": + raise ValueError("missing closing curly brace, " + f"got {next_token}") expression = [bounds, expression] return expression @@ -100,12 +93,12 @@ def parse_atom(tokens): group_num = tokens.next_group_num() subexpression = parse(tokens) tokens.pop() - return ["(", group_num, subexpression] + return ["grp", group_num, subexpression] elif tokens.peek() == "${": tokens.pop() numeric_expression = parse_numeric_expr(tokens) tokens.pop() - return ["${", numeric_expression] + return ["expr", numeric_expression] elif tokens.peek() not in "(){}|!+*?": return tokens.pop() else: @@ -143,8 +136,9 @@ def parse_character_class(tokens): characters.add(prev_character) branches = ["|"] if not negated: - branches.extend(char for char in ASCII_CHARS if char in characters) + branches.extend(sorted(characters)) else: + # Negated character classes are ASCII-only branches.extend(char for char in ASCII_CHARS if char not in characters) return branches @@ -184,16 +178,49 @@ def parse_atomic_expr(tokens): return number elif isbackreference(next_token): return tokens.pop() + elif next_token == "(": + tokens.pop() + expression = parse_numeric_expr(tokens) + next_token = tokens.pop() + if next_token != ")": + raise ValueError(f"missing closing parenthesis, got {next_token}") + return expression elif next_token in [",", "}"]: return "" else: raise ValueError(f"expected arithmetic expression, got {next_token}") +def contains_infinite_quantifier(regex): + if type(regex) is not list: + return False + elif regex[0] in ["|", "!", "cat"]: + # Test if any of the subexpressions contain infinite quantifiers + return any(map(contains_infinite_quantifier, regex[1:])) + elif regex[0] == "grp": + # Test if the group contents contain infinite quantifiers + return contains_infinite_quantifier(regex[2]) + elif isinstance(regex[0], tuple): + # Test if the repetition is unbounded above + if len(regex[0]) == 2 and regex[0][1] == "": + # An empty upper bound represents infinity + return True + else: + # A nonempty upper bound is finite; test if the subexpression + # contains infinite quantifiers + return contains_infinite_quantifier(regex[1]) + else: + # Anything else is not a quantifier and cannot have nested + # expressions + return False + + class MatchState: def __init__(self, string="", pos=0, direc=1, offset=0, - extend_forward=True, extend_back=True, inputs=None): + extend_forward=True, extend_back=True, inputs=None, + rep_limit=0): self.string = str(string) + self.rep_limit = rep_limit try: self.pos = int(pos) except (TypeError, ValueError): @@ -220,7 +247,8 @@ def __str__(self): def copy(self): new_state = MatchState(self.string, self.pos, self.direc, self.offset, self.extend_back, - self.extend_forward, self.inputs) + self.extend_forward, self.inputs, + self.rep_limit) new_state.groups = self.groups.copy() return new_state @@ -254,7 +282,7 @@ def match_literal_string(string, match_state): def eval_numeric(expression, match_state): "Evaluates the expression and returns a number." - if isinstance(expression, int): + if isinstance(expression, (int, float)): result = expression elif isinstance(expression, list): operator, *operands = expression @@ -322,7 +350,7 @@ def resolve_backreference(expression, match_state): def match(regex, match_state): if regex is None: - yield match_state + yield match_state.copy() ## elif regex == "~": ## # TODO: Reverse direction ## new_state = match_state.copy() @@ -354,42 +382,41 @@ def match(regex, match_state): elif regex[0] == "|": # Alternation for subexpression in regex[1:]: - for new_state in match(subexpression, - match_state): - yield new_state + yield from match(subexpression, match_state) elif regex[0] == "!": # Like alternation, but stop trying other options as soon as # we find one that works found_match = False for subexpression in regex[1:]: - for new_state in match(subexpression, - match_state): + for new_state in match(subexpression, match_state): yield new_state found_match = True if found_match: break - elif regex[0] == "": + elif regex[0] == "cat": # Concatenation; match one at a time, recursively if not regex[1:]: # Base case: nothing left to match - yield match_state + yield match_state.copy() else: - # Recursive case: match the first item in the alternation, + # Recursive case: match the first item in the concatenation, # and then match the rest first_part = regex[1] - rest = [""] + regex[2:] - for first_part_match in match(first_part, - match_state): - for rest_match in match(rest, - first_part_match): - yield rest_match - elif isinstance(regex[0], list): + rest = ["cat"] + regex[2:] + for new_state in match(first_part, match_state): + yield from match(rest, new_state) + elif isinstance(regex[0], tuple): # Repetition - lower_bound, upper_bound = (eval_numeric(expr, match_state) - for expr in regex[0]) - if regex[0][1] == "": - # An empty upper bound represents infinity - upper_bound = INFINITY + bounds = [eval_numeric(expr, match_state) for expr in regex[0]] + if len(bounds) == 1: + # Constant number of repetitions + lower_bound, = upper_bound, = bounds + else: + # Distinct lower and upper bounds + lower_bound, upper_bound = bounds + if regex[0][1] == "": + # An empty upper bound represents infinity + upper_bound = INFINITY if lower_bound is None or upper_bound is None: # One of the bounds contained a backreference that failed # or a division by 0 @@ -397,32 +424,62 @@ def match(regex, match_state): if lower_bound < 0: lower_bound = 0 subexpression = regex[1] - for reps in range_from_to(lower_bound, upper_bound): - if reps == 0: - yield match_state + if lower_bound > 0: + # Match at least once + rest = [(lower_bound - 1, upper_bound - 1), subexpression] + for new_state in match(subexpression, match_state): + yield from match(rest, new_state) + else: + if upper_bound == INFINITY: + # Infinite quantifiers can continue only until the + # current repetition limit + actual_upper_bound = match_state.rep_limit else: - for first_match in match(subexpression, - match_state): - rest = [[reps - 1, reps - 1], subexpression] - for rest_match in match(rest, - first_match): - yield rest_match - elif regex[0] == "(": + # Finite quantifiers can continue until their upper bound + actual_upper_bound = upper_bound + for reps in range(actual_upper_bound + 1): + if reps == 0: + yield match_state.copy() + else: + for new_state in match(subexpression, match_state): + rest = [(reps - 1,), subexpression] + if upper_bound == INFINITY: + # Infinite quantifiers use up repetition limit + new_state.rep_limit -= reps + yield from match(rest, new_state) + elif regex[0] == "grp": # Capture group group_num, subexpression = regex[1:] start_index = match_state.pos - for new_state in match(subexpression, - match_state): + for new_state in match(subexpression, match_state): end_index = new_state.pos matched_string = new_state.string[start_index:end_index] new_state.groups[group_num] = matched_string yield new_state - elif regex[0] == "${": + elif regex[0] == "expr": + # Numeric expression to be matched literally value = eval_numeric(regex[1], match_state) if value is not None: new_state = match_literal_string(str(value), match_state) if new_state is not None: yield new_state + else: + raise ValueError(f"Unrecognized parse tree element: {regex[0]!r}") + +def all_matches(regex, inputs): + if contains_infinite_quantifier(regex): + rep_limit = 0 + while True: + initial_state = MatchState(inputs=inputs, rep_limit=rep_limit) + for match_result in match(regex, initial_state): + if match_result.rep_limit == 0: + # Only yield match results that used up the whole + # quota of repetitions + yield match_result + rep_limit += 1 + else: + # Without an infinite quantifier, just yield all results + yield from match(regex, MatchState(inputs=inputs)) def main(regex, inputs=None, result_limit=1, match_sep="\n", @@ -446,13 +503,12 @@ def main(regex, inputs=None, result_limit=1, match_sep="\n", print("Parse tree:") pprint.pprint(parsed_regex) print(verbose_separator) - for i, match_result in enumerate(match(parsed_regex, - MatchState(inputs=inputs))): + for i, match_result in enumerate(all_matches(parsed_regex, inputs)): + if i >= result_limit: + break if i > 0: print(match_sep, end="") print(match_result, end="") - if i + 1 >= result_limit: - break if trailing_newline: print() diff --git a/tinylisp/tinylisp.py b/tinylisp/tinylisp.py index 53a90c3..8643d58 100644 --- a/tinylisp/tinylisp.py +++ b/tinylisp/tinylisp.py @@ -538,7 +538,7 @@ def tl_def(self, name, value): self.global_names[name] = self.tl_eval(value) return name else: - error("cannot define", tl_type(name)) + error("cannot define", self.tl_type(name)) return nil @macro