Excerpts of Varnoobius: 2009

Expanding (a + b)^n yields the summation of the 2^n terms. This program will show you the expanded formula. The example below shows (1.6 + 0.4)^3 expanded and solved:


$ ./binomial 1.6 0.4 3
(1.600000 + 0.400000)^3
 = (1.600000 + 0.400000)(1.600000 + 0.400000)(1.600000 + 0.400000)
 = (1.600000)(1.600000)(1.600000) + (1.600000)(1.600000)(0.400000) + (1.600000)(0.400000)(1.600000) + (1.600000)(0.400000)(0.400000) + (0.400000)(1.600000)(1.600000) + (0.400000)(1.600000)(0.400000) + (0.400000)(0.400000)(1.600000) + (0.400000)(0.400000)(0.400000)
 = 8.000000

Rather than use the FOIL method to compute the 2^20 terms of the expansion, it makes use of the fact that all of the terms consist of of either 4 or 2; a binary dichotomy. Instead an algorithm to convert a the value of a variable of type long to a binary representation is used.


$ time ./binomial 4.0 2.0 20 | tail -n 1
 = 3656158440062976.000000

real 0m17.482s
user 0m16.650s
sys 0m0.830s

It may be slow, but at least accurately computed (4 + 2)^20 properly using a very similar algorithm to that of a person doing algebra with pencil and paper who had never heard of the Binomial Theorem.
For solving Powers of Binomials problems, using Binomial Theorem instead would have been an approach which used less running time, of course. Or the following:


double result = pow(a + b, exp);

Here is my binomial.c file:


/*
 * Copyright (c) 2009, Sean A.O. Harney 
 *
 * Calculate (a+b)^exp
 * TODO: Breaks with exponents larger than 30, check for arch specific limit when parsing cmdline args
 * TODO: Change it to use C99 fixed width integer types instead of long.
 */

#include <stdio.h>
#include <stdlib.h>

double binomial_expansion(double a, double b, int exp);
double calcterm(unsigned long x, int width, double a, double b);

int main(int argc, char *argv[])
{
    double a, b;
    int exp;   //positive.
    char *endptr;

    if (argc < 4)
      {
   fprintf(stderr, "Usage:\t%s a b exp\n", argv[0]);
   exit(1);
      }

    a = strtod(argv[1], &endptr); // C89
    if (endptr == NULL)
      {
   perror("strtod()");
   exit(1);
      }

    b = strtod(argv[2], &endptr);
    if (endptr == NULL)
      {
   perror("strtod()");
   exit(1);
      }

    exp = strtol(argv[3], &endptr, 10); // atoi() does not error check
    if (endptr == NULL)
      {
   perror("strtol()");
   exit(1);
      }
    if (exp <= 0)
      {
   fprintf(stderr, "exp must be positive integer.\n");
   exit(1);
      }

    binomial_expansion(a, b, exp);
    exit(0);
}


// return value as computed
double binomial_expansion(double a, double b, int exp)
{
    long num_terms;  // will be 2^exp terms
    long i;
    double total = 0;

    printf("(%f + %f)^%d\n\t= ", a, b, exp);

    for (i = 0, num_terms = 2; i < exp; i++)
      {
   if (i < exp - 1)
       num_terms *= 2; //raise num_terms by one power for all iteration but last
   printf("(%f + %f)", a, b);
      }


    printf("\n\t= ");
    for (i = 0; i < num_terms; i++)
      {
   total += calcterm(i, exp, a, b);
   if (i < num_terms - 1)
       printf(" + ");
      }

    printf("\n\t= %f\n", total);

    return total;
}


// modified from a convert int to binary string representation function
double calcterm(unsigned long x, int width, double a, double b)
{
    double s[64];  // max exp size
    int i = width - 1;
    double total = 0.0;

    do
      {
   s[i--] = (x & 1) ? b : a; // b for 1, a for 0
   x >>= 1;
      }
    while (x > 0);
    while (i >= 0)
 s[i--] = a;  // fill with a (0), all must be fixed width

    for (i = 0; i < width; i++)
      {
   if (i == 0)
       total = s[0];
   else
       total *= s[i];
   printf("(%f)", s[i]);
      }

    return total;
}

Hello, as you may be aware, Google's popular blogger.com/blogspot.com blogging site allows you to easily monetize your blog by serving Adsense advertisements on it.

While a blogger has the option of not displaying their profile on the page, it is still possible to group who owns which blogs because of the Adsense publisher id!


/* 
 * 7/7/2009
 * 
 * gcc -lcurl -I/usr/include/libxml2 -lxml2 -o getblognames getblognames.c
 *
 * Generates a list of blogspot.com/blogger.com blog URLs
 * by accessing http://www.blogger.com/next-blog to get a random blog redirect.
 *
 * Based on http://cool.haxx.se/cvs.cgi/curl/docs/examples/getinmemory.c
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <curl/curl.h>
#include <curl/types.h>
#include <curl/easy.h>

#include <libxml/HTMLparser.h>





struct MemoryStruct {
    char *memory;
    size_t size;
};

static void *myrealloc(void *ptr, size_t size);

static void *myrealloc(void *ptr, size_t size)
{
    /* There might be a realloc() out there that doesn't like reallocing
       NULL pointers, so we take care of it here */
    if (ptr)
 return realloc(ptr, size);
    else
 return malloc(size);
}

static size_t
WriteMemoryCallback(void *ptr, size_t size, size_t nmemb, void *data)
{
    size_t realsize = size * nmemb;
    struct MemoryStruct *mem = (struct MemoryStruct *) data;

    mem->memory = myrealloc(mem->memory, mem->size + realsize + 1);
    if (mem->memory)
      {
   memcpy(&(mem->memory[mem->size]), ptr, realsize);
   mem->size += realsize;
   mem->memory[mem->size] = 0;
      }
    return realsize;
}

void print_ahrefs(xmlNode * a_node)
{
    xmlNode *cur_node = NULL;

    for (cur_node = a_node; cur_node; cur_node = cur_node->next)
      {
   if (cur_node->type == XML_ELEMENT_NODE)
     {
  if (!strcasecmp((char *) cur_node->name, "a"))
    {
        unsigned char *url =
     xmlGetProp(cur_node, (xmlChar *) "href");
        printf("%s\n", url);
        fflush(stdout);
        xmlFree(url);
    }
     }

   print_ahrefs(cur_node->children);
      }
}

int main(int argc, char **argv)
{
    CURL *curl;
    CURLcode res;

    struct MemoryStruct chunk;

    curl_global_init(CURL_GLOBAL_ALL);

    curl = curl_easy_init();

    curl_easy_setopt(curl, CURLOPT_URL,
       "http://www.blogger.com/next-blog");

    /* send all data to this function  */
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);

    /* we pass our 'chunk' struct to the callback function */
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk);

    while (1)
      {
   chunk.memory = NULL; /* we expect realloc(NULL, size) to work */
   chunk.size = 0; /* no data at this point */

   res = curl_easy_perform(curl);
   if (res != 0)
     {
  fprintf(stderr, "%s\n", curl_easy_strerror(res));
  exit(1);
     }

   if (chunk.memory)
     {
  //assumes it is a string
  //printf("%s\n", chunk.memory);
  htmlDocPtr doc =
      htmlParseDoc((unsigned char *) chunk.memory, NULL);
  xmlNode *root_element;

  if (doc == NULL)
    {
        fprintf(stderr, "error parsing HTML\n");
        exit(1);
    }

  root_element = xmlDocGetRootElement(doc);
  print_ahrefs(root_element);
  xmlFreeDoc(doc); //no htmlFreeDoc(), this seems to work to prevent memory leak however.
  free(chunk.memory);
     }

      }
    curl_easy_cleanup(curl);
    curl_global_cleanup();

    exit(0);
}

The above code will output random blogger URLs line by line. Using a persistent HTTP connection, it will continue to retreive http://www.blogger.com/next-blog and extract out the redirection link. It requires libcurl and libxml2 to compile.

The next program is a simple shell script which will parse the output of the above program and in turn create a CSV (comma seperated variable) file consisting of blogURL, AdsensePublisherID
If the blog does not have adsense, the second column will be left blank.


#!/bin/sh
#
# cat blogsURLlist.txt | ./getadpublishers.sh > blogads.csv


while read url ; do
  echo "\"$url\" , \""`wget -qO- "$url" |grep google_ad_client| \
  head -n 1| sed "s:google_ad_client[ ]*=[ ]*\"pub-\([0-9]*\)\";:\1:"`'"'
done

Now, to put it all into action:


./getblognames | ./getadpublishers.sh > blogs.xml

This will run indefinitely as there are millions of blogs, and it does not check for duplicates!
It may be a wise idea to run the getblognames program independently to generate lists of the blog URLs beforehand, but I will simply pipe it to the shell script here.

Now it will be trivial to determine which Google Adsense publisher IDs are shared between supposedly independent blogs!

If Google wants to protect the identity of their Advertisers perhaps they should assign multiple publisher IDs to each publisher, so that they can put a unique one on each page!

I will post the results when I have some, hopefully there will be lulz to be had!

Excerpts of Varnoobius

Friday, October 2, 2009

Powers of Binomials in C

Tuesday, July 7, 2009

Grouping blogger.com blogs based on the adsense account. Does Google do enough to protect its user's privacy?

Followers

Blog Archive