Archive for category Happy Hacking

Bash: Auto-Detecting a http proxy

This bash function detects a http proxy. It tries the current http_proxy environment variable, direct connection, firefox settings and simple automatic proxy configuration (wpad).

Use it like this:

export http_proxy=$(detectProxy)

function detectProxy() {
URL=http://www.google.co.uk/
wget -q -O /dev/null $URL && echo $http_proxy && return
unset http_proxy
wget -q --no-proxy -O /dev/null $URL && return

for i in ~/.mozilla/*/*/prefs.js
do
host=$(grep -Eo ‘network.proxy.http”, “[^"]*’ $i | sed ’s/.*, “//g’)
port=$(grep -Eo ‘network.proxy.http_port”, [^)]*’ $i | sed ’s/.*, //g’)
if [ -z "$host" ] || [ -z "$port" ]; then
continue
fi
export http_proxy=http://$host:$port/
wget -q -O /dev/null $URL && echo $http_proxy && return
done
export http_proxy=http://$(wget -q wpad/wpad.dat -O – |
grep -Eo ‘return [^;]*’| sed ’s,return ,,g’|sed “s,['\"]*,,g”)
wget -q -O /dev/null $URL && echo $http_proxy && return
unset http_proxy
echo unable to detect proxy >&2
}

No Comments

Merging and sorting sorted sources

This neat little algorithm takes a number of sorted files and merges them together to a sorted file. It uses fixed block sizes (predefined 16).

It basically has a “slot” for each file, you may think a queue of the blocks in this file, and tries to work its way through to the end of all queues. So it takes from the slot with the smallest block, writes it to the output file, and refills the slot with the next queue element from that file.
It is a little more sophisticated, because instead of looking through all slots for the next smallest block, it actually juggles the slots (files) in a sorted manner.
I will definitely reuse this.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
 
#define outputfile "mergedblocks"
#ifndef DEBUG
#define DEBUG 0
#endif
 
#define ENTRYSIZE 16
 
FILE * f;
 
int comp(const char * a, const char * b) {
	int i;
	for (i = 0; i < ENTRYSIZE; i++) {
		if (a[i] != b[i])
			return a[i] - b[i];
	}
	return 0;
}
void switchentries(char * a, char * b) {
	int i; 
	char t;
	for(i=0; i<ENTRYSIZE; i++) {
		t = a[i];
		a[i] = b[i];
		b[i] = t;
	}
}
 
 
int main(int argc, char ** argv)
{
	int i;
	int j;
	int nfiles;
	int v;
 
	char ** filenames;
	FILE ** files;
	FILE * outfile;
	char ** data;
	unsigned long * nentries;
	int * slot2file;
 
	assert(argc > 1);
	filenames = argv + 1;
	nfiles = argc - 1;
 
	files = (FILE **)calloc(nfiles, sizeof(FILE*));
	assert(files != NULL);
	data = (char **) calloc(nfiles, sizeof(char*));
	assert(data != NULL);
	nentries = (unsigned long *) calloc(nfiles, sizeof(unsigned long));
	assert(nentries != NULL);
	slot2file = (int *) calloc(nfiles, sizeof(int));
	assert(slot2file != NULL);
	for (i = 0; i < nfiles; i++) {
		files[i] = fopen(filenames[i], "r");
		assert(files[i] != NULL);
		nentries[i] = 0;
		slot2file[i] = i;
		data[i] = (char*) malloc(sizeof(char) * ENTRYSIZE);
		assert(data[i] != NULL);
		memset((void*)data[i], 0, ENTRYSIZE);
 
		/* load first entry */
		for (j = 0; j < ENTRYSIZE; j++) {
			v = getc(files[i]);
			assert(v != EOF);
			data[i][j] = v;
		}
		nentries[i]++;
		/* find next fitting */
		for(j = i - 1; j >= 0 && comp(data[j], data[j + 1]) > 0; j--) {
			if(DEBUG)
			printf("switching %d <-> %d\n", j, j+1);
			switchentries(data[j+1], data[j]);
			v = slot2file[j+1];
			slot2file[j+1] = slot2file[j];
			slot2file[j] = v;
		}
	}
	outfile = fopen(outputfile, "w");
	assert(outfile != NULL);
 
	while(1) {
		if(DEBUG)
		for (i = 0; i < nfiles; i++) {
			printf("slot %3d: ", i);
			for(j = 0; j < ENTRYSIZE; j++) {
				printf("%02x", (unsigned char)data[i][j]);
			}
			j = slot2file[i];
			if (j < 0) {
				printf(" (ended)");
				j = (j + 1)*-1;
			}
			printf("\t%d->%d: '%s'", i, j, filenames[j]);
			printf("\n");
		}
		if(slot2file[0] < 0) {
			break;
		}
		/* smallest is at 0 */
		/* write out smallest */
		if (slot2file[1] >= 0 && comp(data[0], data[1]) == 0) {
			printf("duplicate found\n");
			printf("file %s, item %lu\n", filenames[slot2file[0]], nentries[slot2file[0]]);
			printf("file %s, item %lu\n", filenames[slot2file[1]], nentries[slot2file[1]]);
			for(j = 0; j < ENTRYSIZE; j++) {
				printf("%02x", (unsigned char)data[0][j]);
			}
			printf("\n");
		} else {
			if(DEBUG)
			printf("smallest writeout\n");
			for (j = 0; j < ENTRYSIZE; j++) {
				putc(data[0][j], outfile);
			}
		}
 
		if(DEBUG)
		printf("refill %s\n", filenames[slot2file[0]]);
		/* refill slot if possible */
		for (j = 0; j < ENTRYSIZE; j++) {
			v = getc(files[slot2file[0]]);
			if (v == EOF) {
				if(DEBUG)
				printf("end of file\n");
				slot2file[0] = -slot2file[0] - 1;
				break;
			}
			data[0][j] = v;
		}
		if (slot2file[0] >= 0) {
			nentries[slot2file[0]]++;
		}else{ /* push to the end */
			if(DEBUG)
			printf("moving to the end\n");
			for(j = 1; j < nfiles && slot2file[j] >= 0; j++) {
				i = j - 1;
				if(DEBUG)
				printf("switching %d <-> %d\n", i, j);
				switchentries(data[i], data[j]);
				v = slot2file[i];
				slot2file[i] = slot2file[j];
				slot2file[j] = v;
			}
 
		}
		/* put in right place (bubblesort for 1 unsorted item at 0) */
		if(DEBUG)
		printf("sorting\n");
		for(j = 1; j < nfiles && slot2file[j] >= 0;j++){
			i = j - 1;
			if(comp(data[i], data[j]) > 0) {
				if(DEBUG)
				printf("switching %d <-> %d\n", i, j);
				switchentries(data[i], data[j]);
				v = slot2file[i];
				slot2file[i] = slot2file[j];
				slot2file[j] = v;
			}else{
				break;
			}
		}
	}
 
 
	for (i = 0; i < nfiles; i++) {
		slot2file[i] = -(slot2file[i] + 1);
		fclose(files[i]);
		if (i>0)
			nentries[0] += nentries[i];
	}
	fclose(outfile);
	printf("mergesorted %d files (%lu entries), wrote to %s.\n", 
		nfiles, nentries[0], outputfile);
	/*free(data);*/
 
	return 0;
}

2 Comments

MD5 collisions lottery

Short summary: I tried to collide md5 hashes from within the 128 bit range itself. I used 12464890370 md5 hashes, but found none.
Long explanation:
md5 is a hashing algorithm that produces 128 bits. It is clear that with arbitrary long input strings, you will have two inputs that yield the same output (this is called a collision). You can also produce a hash of a hash. Seeing md5 as a black box, it might be that a collision is possible within the 128 bit input range.
Starting with 0{rest zero bits} to 255{rest zero bits}, I produced a stream of md5 hashes for each: start -> hash -> hash -> hash -> …, and stored all of them. Then I looked if any of these 12402390369 hash values (=186GB) I produced were the same (I used a form of mergesort). The answer is no, none were the same.
My personal conclusion:
If we assume that this sample of 10^10 values is representative, and there had been 1 collision, the share of collisions would be around 10^-11. This is the share of the 128 bit space I looked at, the whole space is around 10^38 big (2^128). One collision would have meant that there were 10^28 collisions.
Since I didn’t find any collisions, we can assume there are less than 10^28 collisions in the space. My personal guess is that there are around 10. It would be neat to have a md5 identity (where hash(x)=x ), but I don’t think that is possible.
A algorithmic analysis might be more successful than what I did, but it is complicated to produce collisions, and there haven’t been any constructed within space boundaries.

Another thing I noticed is that the disk speed is the very limiting factor for producing and storing hashes.

The generating code is basically a loop around the following code. And then some qsort over the output once its done.

MD5_Init(&md5_state);
MD5_Update(&md5_state, message, BUF_SIZE);
MD5_Final(message, &md5_state);
for(j = 0; j < 16; j++)
    fputc(message[j], f);

3 Comments

Tomboy – Usability patch

Got a little patch into tomboy for a menu item in the status icon menu.

No Comments

Linear interpolation

Given the rectangle xleft, xright, yleft and yright, which mark the lower and upper bounds, this interpolates a y for the input value x.

double interpolate(double yleft, double yright, double x, double xleft, double xright) {
	double deltax = xright - xleft;
	double deltay = yright - yleft;
	double k = deltay/deltax;
	return (x - xleft) * k + yleft;
}

No Comments

PdfJoin for Windows

PdfJoin screenshot

Allows you to merge and split PDF files.

Download installer pdfjoin-setup.exe

Try it out and recommend it to others.

PS: This is just a Windows distribution of an earlier post, packaging GTK+ and Ghostscript using py2exe and innosetup. GTK+ is licensed under GNU LGPL 2.1, Ghostscript is licensed under GPL 2, and the GUI under a modified BSD license.

No Comments

Jake

Did I mention Jake came out of the closet?

No Comments

Java concepts in C: classes

While to some it might seem absurd, some concepts in Java development can be used more or less straight-forward in C. Lets start with basic things such as object-oriented programming. OOP is a way of programming that does not rely on the language in use.

You have several choices in C:

  1. (most common): use a .h file with the struct and all functions related to the struct. if the struct is called foo_bar, the functions are called foo_bar_init(), foo_bar_set_baz(), etc.
  2. (the gnome way): gobject is a class concept that uses preprocessor directives. It is as effective as ugly.

I prefer the first. A funky thing you can do in C is to put the functions as function pointers in the struct:


typedef struct {
int a;
(void)*set_a(int);
} myclass;
void myclass_set_a(myclass * c, int new_a) {
c->a = new_a;
}
myclass * myclass_init() {
myclass * c = (myclass*) malloc(sizeof(myclass));
c->set_a = myclass_set_a;
return c;
}

This allows you to write

c->set_a(c, 42);

in your code, which already has a very object-oriented-language touch. It is basically the same thing python does: The first argument of a method is the object, only if you are already in the object and call self.mymethod(), python fills in the self for you.

If you want something ugly, you add a type-unsafe subclassing concept:

typedef struct {
int a;
void * additional_data;
} classA;
classA * classA_init();
typedef struct {
int b;
void * additional_data;
} classB;
classA * classB_init() {
classB * b = (classB*)malloc(sizeof(classB));
classA * a = classA_init();
a->additional_data = (void*)b;
return a;
}
classB_set_b(classA * c, int newb)  {
(classB*)(c->additional_data)->b = newb;
}

2 Comments

C: IDE of choice

I used to use gedit as C IDE, I still use vim, but today I’d like you to try eclipsecdt.
It has the benefit of highlighting the line of errors and being able to jump there, rebuilding with a keystroke (Ctrl-B) without leaving the window and providing a tree view. So far, lots of C IDEs do that. anjuta for example. The thing that made the difference for me is that eclipsecdt provides completion for structs (functions too) when you haven’t written any letter yet. For example “mystructp->”(Ctrl-Space). I had a look at anjuta and BLOCKS.
Further, the outline (functions, structs, directives in the file) is nice as well as a very good preprocessor expansion/preview. Eclipsecdt also has a sufficient method of autoformatting your C code and also respects your preprocessor code.
So if you have

#define IFDEBUG if(1)
IFDEBUG
printf("blah");

depending on your settings, printf will be intended.

Last word: Intend with tabs and indent “case:”. If you don’t, you are wrong :-).
Edit: typo.

2 Comments

Java concepts in C: logging

One will always run into the problem that one needs more detailed output from a section at some time, and less verbose output at another time. Or the whole output should be more verbose, or you want to investigate a segfault.

There are some logging libraries (log4c for example). I however use the following approach:



debug.h:

#ifdef DEBUG

#define IFDEBUG if(1)

#else

#define IFDEBUG if(0)

#endif

#ifdef SEGV

#define IFSEGV if(1)

#else

#define IFSEGV if(0)

#endif

#define debug(str)        IFDEBUG { printf("\tDEBUG[%s]: %s\n", AT, str); fflush(NULL); }

#define dump_i(str, var)  IFDEBUG { printf("\tDEBUG[%s]: %s: %i\n", AT, str, var); fflush(NULL); }

#define dump_p(str, var)  IFDEBUG { printf("\tDEBUG[%s]: %s: %p\n", AT, str, var); fflush(NULL); }

in your code:

		debug("we are now doing foo ");

		IFSEGV

			dump_p("m\n", (void*)m);

		IFDEBUG

			printf("\t\tn_par=%d; status=%d\n", get_n_par(m), status);

Example output:

	DEBUG[tests/tests.c:196]: add starting points, ...

This allows you to recompile your code with -DDEBUG -DSEGV and enable/disable verbosity levels without constant code changes.

2 Comments