/* overflow.c, by benzrf
 * Demonstrates the pitfalls of undefined behavior.
 * Examine the code below and guess what it should output. Then continue
 * reading for the answer(s) and an explanation, plus some bonus moralizing!

 https://gist.github.com/benzrf/13830b86b7077666d8ee438fc69e5b8e

 */

#include <limits.h>
#include <stdio.h>

void test_overflow(int n, int m) {
    if (n >= 0 && m >= 0) {
        int x = n + m;
        printf("n + m = %d, which is ", x);
        if (x >= 0) printf("nonnegative\n");
        else printf("negative\n");
    }
}

int main(void) {
    int big = INT_MAX / 2 + 1;
    /* The *actual* sum of big + big cannot fit in an int, so test_overflow is
     * going to show us something else. */
    test_overflow(big, big);
    return 0;
}

/* Output on my laptop:
 *
 * $ cc -o overflow overflow.c && ./overflow
 * n + m = -2147483648, which is negative
 *
 * $ cc -O -o overflow overflow.c && ./overflow
 * n + m = -2147483648, which is nonnegative
 *
 * The added "-O" flag requests that the compiler perform optimizations.
 *
 * The reason for this strange output is that if the true sum of a pair of
 * signed ints (as mathematical integers) is larger than the signed int type
 * can represent, then actually adding them together in a C program results in
 * *undefined behavior* (see footnote 1). When something is undefined behavior,
 * it means that taking *any action whatsoever* (see footnote 2) is a valid
 * implementation of that operation, *including* doing different things each
 * time or doing different things in different parts of the code. In
 * particular, it's a valid implementation to skip over the code following the
 * offending operation, or to alter it in arbitrary ways. Let's examine why
 * this can lead to the output above.
 *
 * Footnote 1: If I recall correctly, adding together *unsigned* numbers is
 * *not* undefined behavior, and it is required by the C standard to follow the
 * overflow rules that you'd expect.
 *
 * Footnote 2: There are actually different categories of undefined behavior,
 * with restrictions on what kind of actions they can cause (bounded vs
 * critical undefined behavior), but the details of this aren't relevant to
 * this example or my general point.
 */

/* The same function, but with explanatory comments this time. */
void test_overflow2(int n, int m) {
    /* Let's examine the compiler's "thought process" while compiling this
     * function. */
    if (n >= 0 && m >= 0) {
        /* The code in this block can only execute if the check above passes.
         * Therefore, it's safe to assume that n and m are both nonnegative
         * while the following code is running (until the end of the if). */

        /* There are two possibilities when computing the sum below.
         * 1. The true sum of n and m is small enough to represent as an int,
         * so x will be set to that sum. Since n and m are both nonnegative,
         * their true sum will also be nonnegative. Thus, in this case, x will
         * be nonnegative in the rest of this code.
         * 2. The true sum of n and m is *not* small enough to represent as an
         * int. In *this* case, computing the sum results in undefined
         * behavior, so anything we do from here on is acceptable.
         *
         * In either case, proceeding on the assumption that x >= 0 will
         * produce acceptable behavior.
         */
        int x = n + m;
        printf("n + m = %d, which is ", x);

        /* We already know that we're allowed to assume x >= 0, so it's fine to
         * just run the first printf and never bother to make any checks or do
         * any branching. The assembly language for doing so is both slightly
         * shorter *and* marginally faster, so it's a pure win, and that's what
         * the compiler produces. */
        if (x >= 0) printf("nonnegative\n");
        else printf("negative\n");
    }
}

/* The reason why we got the expected result when we didn't use "-O" is that
 * "thinking things through" in the above manner can take a lot of processing
 * power, so the compiler defaults to not bothering and just giving you a
 * working output as quickly as possible. If you want to compile a final result
 * that you will use a lot, though, it's a no-brainer to trade off a little bit
 * more compilation time right now for a little bit less running time whenever
 * you execute the compiled program in the future. */

/* Doing things whose behavior is undefined does not just leave you open to
 * unexpected result values and to crashes. It leaves you open to having parts
 * of your program run that logically should not have been possible to reach,
 * or having parts of your program skipped that logically should have been
 * executed. Even aside from security issues, this can result in error-handling
 * being skipped, which may cause your program to continue to run past when it
 * should have aborted, creating a debugging nightmare.
 *
 * Here's a good (albeit huge) resource for rules and recommendations on
 * dealing with C's bullshit: https://wiki.sei.cmu.edu/confluence/display/c I
 * haven't personally ever tried reading it from start to finish, but I've
 * found it useful as something to search if I'm uncertain about whether
 * something might have dangerous edge cases. They also have a big table of
 * things that have undefined behavior:
 * https://wiki.sei.cmu.edu/confluence/display/c/CC.+Undefined+Behavior
 */