023‚c. UNIX didn't have sparse files before V7 (+ V1 patch) — blogn't

↩ `023,c. unix didn't have sparse files before V7 (+ V1 patch)` ↩

Sun, 25 Jan 2026 23:28:55 +0100

V1 unix
V4 unix
V5 unix
V6 unix
V7 unix
Notes
V1 unix patch

As we know, the core conceit of sparse files is that when you write to a file, and the start of the write is past the current file length, that space is filled with synthetic zero bytes, which (to the precision of some block size) aren't stored on disk. The definition in the excerpt below agrees with this. They are first present in V7 unix, in spite of having been described in this form since V1.

# V1 unix

Per UNIX Programmer's Manual, K. Thompson & D. M. Ritchie, November 3, 1971, format of file system, p. 3,

If block b in a file exists, it is not necessary that all blocks less than b exist. A zero block number either in the address words of the i-node or in an indirect block indicates that the corresponding block has never been allocated. Such a missing block reads as if it contained all zero words.

and a picture is worth 861 words, so, filtered for relevant files:

:login: root
root
# chdir /tmp
# cat >big.s
start:
	mov	$1, r0
	sys	write; start; 10
	mov	$1, r0
	sys	seek; 4087.; 0
	sys	write; start; 10
	sys	exit
# ed big.s
105
/4087/
        sys     seek; 4087.; 0
s/4087/65527/
w large.s
106
q
# df
723+2364
# as big.s
I
II
# a.out >big
# as large.s
I
II
# a.out >large
# df
718+2364
# ls -l
total   22
124 sxrwrw  1 root     84 Jan  1 00:00:00 a.out
123 s-rwrw  1 root   4095 Jan  1 00:00:00 big
121 s-rwrw  1 root    105 Jan  1 00:00:00 big.s
126 l-rwrw  1 root  65535 Jan  1 00:00:00 large
125 s-rwrw  1 root    106 Jan  1 00:00:00 large.s
                                          ...
 45 s-rwr-  1 root    142 Jan  1 00:00:00 utmp

i-node 41: USED DIR 0604 links=7 uid=0 size=70
  41[ 4]:    44 tmp
i-node 44: USED DIR 0604 links=2 uid=0 size=200
  44[16]:   126 large
  44[17]:   125 large.s
  44[18]:   123 big
  44[19]:   121 big.s
i-node 126: USED REG LARGE 0606 links=1 uid=0 size=65535
i.dskp: 304 0 0 0 0 0 0 0
indir0: 303 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 305 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
i-node 125: USED REG 0606 links=1 uid=0 size=106
i.dskp: 301 0 0 0 0 0 0 0
i-node 123: USED REG 0606 links=1 uid=0 size=4095
i.dskp: 300 0 0 0 0 0 0 302
i-node 121: USED REG 0606 links=1 uid=0 size=105
i.dskp: 297 0 0 0 0 0 0 0

# df
718+2364
# cat big
@       @@      @# df
712+2364
# cat large
@       @@      @# df
586+2364

i-node 41: USED DIR 0604 links=7 uid=0 size=70
  41[ 4]:    44 tmp
i-node 44: USED DIR 0604 links=2 uid=0 size=200
  44[16]:   126 large
  44[17]:   125 large.s
  44[18]:   123 big
  44[19]:   121 big.s
i-node 126: USED REG LARGE 0606 links=1 uid=0 size=65535
i_dskp: 304 0 0 0 0 0 0 0
indir0: 303 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 305 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
i-node 125: USED REG 0606 links=1 uid=0 size=106
i_dskp: 301 0 0 0 0 0 0 0
i-node 123: USED REG 0606 links=1 uid=0 size=4095
i_dskp: 300 306 307 308 309 310 311 302
i-node 121: USED REG 0606 links=1 uid=0 size=105
i_dskp: 297 0 0 0 0 0 0 0

Q.E.D.

# V4 unix

# df
/dev/rk0 542
# chdir /tmp
# cat >big.s
start:
	mov	$1, r0
	sys	write; start; 10
	mov	$1, r0
	sys	seek; 4087.; 0
	sys	write; start; 10
	sys	exit
# ed big.s
105
/4087/
	sys	seek; 4087.; 0
s/4087/65527/
w large.s
106
# df
/dev/rk0 540
# as big.s
# a.out >big
# as large.s
# a.out >large
# ls -l
total 141
-rwxrwxrwx 1 root       84 Jun 12 19:59 a.out
-rw-rw-rw- 1 root     4095 Jun 12 19:59 big
-rw-rw-rw- 1 root      105 Jun 12 19:59 big.s
-rw-rw-rw- 1 root    65535 Jun 12 19:59 large
-rw-rw-rw- 1 root      106 Jun 12 19:59 large.s
-rw-r--r-- 1 root      144 Jun 12 19:50 utmp
# df
/dev/rk0 534
# cat big large
@       @       @       @       #
# df
/dev/rk0 402

Q.E.D., with same wording in UNIX Programmer's Manual, Fourth Edition, K. Thompson & D. M. Ritchie, November, 1973, file system (V).

# V5 unix

# df
/dev/rk0 444
# chdir /tmp
# cat >big.s
start:
	mov     $1, r0
	sys     write; start; 10
	mov     $1, r0
	sys     seek; 4087.; 0
	sys     write; start; 10
	sys     exit
# ed big.s
105
/4087/
	sys	seek; 4087.; 0
s/4087/65527/
w large.s
106
# df
/dev/rk0 442
# as big.s
# a.out >big
# as large.s
# a.out >large
# ls -l
total 140
-rwxrwxrwx  1 root       84 Mar 21 12:10 a.out
-rw-rw-rw-  1 root     4095 Mar 21 12:10 big
-rw-rw-rw-  1 root      105 Mar 21 12:09 big.s
-rw-rw-rw-  1 root    65535 Mar 21 12:10 large
-rw-rw-rw-  1 root      106 Mar 21 12:09 large.s
# df
/dev/rk0 436
# cat big large
@       @       @       @       #
# df
/dev/rk0 304

Q.E.D., with unchanged wording on p. 2 of UNIX Programmer's Manual, Fifth Edition, K. Thompson & D. M. Ritchie, June, 1974, file system (V).

# V6 unix

# df
/dev/rk0 958
/dev/rk1 937
/dev/rk2 bad free count
192
# cat >big.s
start:
	mov	$1, r0
	sys	write; start; 10
	mov	$1, r0
	sys	seek; 4087.; 0
	sys	write; start; 10
	sys	exit
# ed big.s
105
/4087/
	sys	seek; 4087.; 0
s/4087/65527/
w large.s
106
# df
/dev/rk0 956
/dev/rk1 937
# as big.s
# a.out >big
# as large.s
# a.out >large
# df
/dev/rk0 950
/dev/rk1 937
# ls -l
total 142
-rwxrwxrwx  1 root       84 Oct 10 13:37 a.out
-rw-rw-rw-  1 root     4095 Oct 10 13:37 big
-rw-rw-rw-  1 root      105 Oct 10 13:36 big.s
-rw-rw-rw-  1 root    65535 Oct 10 13:37 large
-rw-rw-rw-  1 root      106 Oct 10 13:36 large.s
# cat big large
@       @       @       @       #
# df
/dev/rk0 818
/dev/rk1 937

Q.E.D., untouched in UNIX Programmer's Manual, Sixth Edition, K. Thompson & D. M. Ritchie, May, 1975, file system (V).

# V7 unix

# chdir /tmp
# cat >big.c
main() {
	long off = 4087;
	write(1, main, 10);
	lseek(1, off, 0);
	write(1, main, 10);
}
# ed big.c
73
/4087/
	lseek(1, 0, 4087);
s/4087/65527/
w large.c
74
# df
/dev/rp0 984
/dev/rp3 297416
# cc big.c
# a.out >big
# cc large.c
# a.out >large
# df
/dev/rp0 975
/dev/rp3 297416
# ls -l
total 14
-rwxrwxr-x 1 root      584 Dec 31 19:06 a.out
-rw-rw-r-- 1 root     4097 Dec 31 19:06 big
-rw-rw-r-- 1 root       73 Dec 31 19:05 big.c
-rw-rw-r-- 1 root       10 Dec 31 19:06 large
-rw-rw-r-- 1 root       74 Dec 31 19:05 large.c
# cat big large
w       <$uww   <$uww   >%uw    >%u#
# df
/dev/rp0 975
/dev/rp3 297416

which for the first time correctly behaves according to the still-intact paragraph in UNIX Programmer's Manual, Sixth Edition, K. Thompson & D. M. Ritchie, May, 1975, file system (V), with the corrected behaviour corresponding to readi() zeroing an unused I/O buffer if the address of the data block is 0.

# Notes

I noticed this when writing a parser for the V1 filesystem, because I forgot about this case. And then I noticed that when writing the parser for V4 filesystems I based it on, I'd also forgotten about this case (the integrity of the v4root.tar dump is unaffected: there are no sparse files on the installation tape rootfs). And then I realised I didn't think I'd seen these branches in V1 mget when reading it for the previous post, either.

unix file system files contain 8 slots for links to 512-byte disk blocks. If the file fits within that size, i.e. is no larger than 4KiB, it is "small", and those links point to data blocks directly. In the samples above, big.[sc] creates a 4KiB file with data in the first 8 bytes and the last 8 bytes, so the rest is sparse: we see this in rf0 dump 1, where the start of big (file 123 (I didn't renumber these, it's just fortuitous)) is contained in block 300 and the end — in block 302. The middle un-written-to blocks are 0 (not allocated).

Bigger files are deemed "large" and the 8 slots point to "indirect" blocks, which themselves contain 256 slots for disk block links. large.[sc] makes a 64KiB file in the same manner as big.?, so we observe that large (file 126) contains one direct link to block 304, which links to blocks 303 and (appropriately further on) 305. The contents of those blocks are the same as 300 and 302. (It's impossible to make a bigger (sparse or otherwise) file in V1 because all file offsets are 16 bits; this is immaterial. The maximum theoretical size of a large file is 1MiB, and later unixes allow addressing this in decreasingly hacky ways.)

To wit: a unix file system file consists of a sequence of "(read block n) or (insert 512 zero bytes)", and reading it sequentially consists of traversing that list, in order, until you've read as much data as the file is large. Small files inline that sequence for performance, large files keep it in chunks on disk (and if a chunk is missing, that's equivalent to 256× "insert 512 zero bytes"). If implemented this way, our original definition (and description from the manual) is fulfilled.

But, as observed above, this is not how unix (before V7) implements this: in the cat log, we can see that just reading the file allocates every formerly-sparse block: df loses 6 (8 − 2) and 126 (also the amount of sparse blocks between 300 and 302 in the indirect block) blocks respectively, and in the subsequent dump we observe this directly: every previously-zero block within the bounds of the file was allocated (unsparsed) and now actually links to a block full of zeroes.

To see why, we can trace the sys read (II) path through sysread → readi-node → dskr (the read routine for non-special files) → mget (noting that sys write (II) is basically the same but memcpys the other way; comments verbatim from Preliminary Release of UNIX Implementation Document, J. DeFelice, 6/20/72 except where [indicated], _-prefixed names expositional, drum is what they sometimes call the small disk containing the rootfs at this time):

struct {
	int  flgs;
	char nlks;
	char uid;
	int  size;
	int  dskp[8];
	int  ctim[2];
	int  mtim[2];
} i; // currently-open i-node
struct {
	// (more spilled process state)
	int *fofp;
	int  count;
} u;
extern r1, ret();
dskr(_ino/*, u.fofp, u.count*/) {
	r1 = _ino;
	/*i =*/ iget(/*r1*/);  // get i-node (r1) into i-node section of core

	r2 = i.size - *u.fofp; // file size […] subtract file offset
	if(r2 <= 0)
		goto *ret;
	if(r2 <= u.count)
		u.count = r2;

	/*r1 =*/ mget(/*i, u.fofp*/); // […] physical block number of block […] where offset points
	/*r5 =*/ dskrd(/*r1*/);       // read in block, r5 points to 1st word of data in buffer
	/*r1, r2, r3 =*/ sioreg(/*u.fofp, u.count, r5*/);
	_memcpy(r1, r2, r3);          // move data from buffer into working core […]

	if(u.count == 0)
		goto *ret;
	else
		goto *dskr;
}

extern r1, r2, r5, mq;
mget(/*i, u.fofp*/) /*-> r1*/ {
	mq = *u.fofp >> 8;  // divide […] by 256.
	r2 = mq;

	if(i.flgs & 010000)
		goto _large;

	if(r2 & 0xFFF0)
		goto _small2large;

	r1 = i.dskp[r2/2];
	if(r1 == 0) {          // if physical block num is zero then need a new block for file
		/*r1 =*/ alloc();  // allocate a new block
		i.dskp[r2/2] = r1; // physical block number stored in i-node
		setimod();         // [mark i-node modified, update mtim]
		clear(/*r1*/);     // zero out disk/drum block just allocated
	}
	return;

_small2large: // adding on block which changes small file to a large file
	// transfer old physical block pointers into new indirect block for the new large file
	/*r1 =*/ alloc();
	/*r5 =*/ wslot(/*r1*/);
	_memcpy(r5, i.dskp, sizeof(i.dskp));
	_memset(i.dskp, 0, sizeof(i.dskp));
	// clear rest of data buffer
	_memset(r5 + sizeof(i.dskp), 0, 512 - sizeof(i.dskp));
	dskwr(/*r1*/);

	i.dskp[0] = r1;   // put pointer to indirect block in i-node
	i.flgs |= 010000; // set large file bit […]
	setimod();
	goto *mget;

_large:
	r2 &= 0xFF << 1; // […] offset in indirect block
	int _in_indir = r2;

	r2 = mq >> 8; // divide byte number by 256.
	r2 &= 0xF;
	r1 = i.dskp[r2/2];
	if(r1 == 0) {          // if no indirect block exists
		/*r1 =*/ alloc();  // allocate a new block
		i.dskp[r2/2] = r1; // put block number of new [indirect] block in i-node
		setimod();         // [mark i-node modified, update mtim]
		clear(/*r1*/);     // [zero out disk/drum block just allocated]
	}
	/*r5 =*/ dskrd(/*r1*/);
	int _indir = r1;         // save block number of indirect block on stack
	r1 = r5[r2 + _in_indir]; // put physical block no of block in file sought in r1
	if(r1 == 0) {            // if no block exists
		/*r1 =*/ alloc();    // allocate a new block
		r5[r2 + _in_indir] = r1; // put new block number into proper location
		                         // in indirect block

		r1 = _indir;
		int _new = r5[r2 + _in_indir]; // […] block number of new block
		wslot(/*r1*/);
		dskwr(/*r1*/);       // write newly modified indirect block back out on disk
		r1 = _new;
		clear(/*r1*/);       // [zero out disk/drum block just allocated]
	}
	return;
}

where we see that reading/writing n bytes from/to file f at offset o is equivalent to

ensuring all blocks in f containing [o, o+n) are allocated, then
doing the read/write from/to the affected blocks.

This is fine for writing, and equivalent to just 2. But 1. means the interface from the manual isn't fulfilled (thus, unix doesn't have sparse files before V7) and means this is more-so an implementation detail optimisation that ensures every I/O read completes in 1-3 I/Os instead of m = blocks(o - f.size).

Unless, of course, you always seek to the same offset before reading. But that runs counter to unix's file-as-bag-of-bytes model, and imposes a certain structure to the file and makes it behave differently based on access pattern, which, again, runs counter to documentation and marketing of the time — DRAFT: The UNIX Time-Sharing System, D. M. Ritchie, mid-1971

3.1 Ordinary Files A file contains whatever information the user places there, for example symbolic or binary (object) programs. No particular structuring is expected by the system. […] A few user programs generate and expect files with more structure; […] however, the structure of files is controlled solely by the programs which use them, not by the system. 3.5 System I/O Calls […] There is no distinction between "random" and sequential I/O, nor is any logical or physical record size imposed by the system. The size of a file on the disk is determined by the location of the last piece of information written on it; no predetermination of the size of a file is necessary.

— as well as those going forward (cf. The UNIX™ System: Making Computers More Productive, 1982, Bell Laboratories, transcribed in the previous post).

Because the files afflicted by this are valid, just much bigger than they ought to be, the only way you really could notice this is by examining the filesystem directly trying to look precisely for this behaviour, as I have? or, on quiet system, check precisely the interaction of this behaviour and df? Being fixed in V7 agrees with this because it implies to me that this facility had started to be used like we'd use sparse files today — with large nominal file size/filesystem size ratios — on the huge unix ≤V6 install base, someone filled their disk, and the fix is pretty simple.

# V1 unix patch

--- /usr/sys/ux.s.bkp	1972-01-01 00:00:00.000000000 +0000
+++ /usr/sys/ux.s	1972-01-01 00:00:00.000000000 +0000
@@ -66,7 +66,7 @@
 sysflg:	.=.+1
 pptiflg:.=.+1
 ttyoch:	.=.+1
- .even
+mget0b:	.=.+1
  .=.+100.; sstack:
 buffer:	.=.+[ntty*140.]
 	.=.+[nbuf*520.]
--- /usr/sys/u5.s.bkp	1972-01-01 00:00:00.000000000 +0000
+++ /usr/sys/u5.s	1972-01-01 00:00:00.000000000 +0000
@@ -1,6 +1,7 @@
 / u5 -- unix
 
 mget:
+	mov	idev,cdev
 	mov	*u.fofp,mq / file offset in mq
 	clr	ac / later to be high sig
 	mov	$-8,lsh   / divide ac/mq by 256.
@@ -13,6 +14,10 @@
 	mov	i.dskp(r2),r1 / r1 has physical block number
 	bne	2f / if physical block num is zero then need a new block
 		   / for file
+	clr	cdev
+	movb	mget0b,r1
+	bne	2f
+	mov	idev,cdev
 	jsr	r0,alloc / allocate a new block
 	mov	r1,i.dskp(r2) / physical block number stored in i-node
 	jsr	r0,setimod / set inode modified byte (imod)
@@ -52,6 +57,10 @@
 	bic	$!16,r2
 	mov	i.dskp(r2),r1
 	bne	2f / if no indirect block exists
+	clr	cdev
+	movb	mget0b,r1
+	bne	3f
+	mov	idev,cdev
 	jsr	r0,alloc / allocate a new block
 	mov	r1,i.dskp(r2) / put block number of new block in i-node
 	jsr	r0,setimod / set i-node modified byte
@@ -65,6 +74,10 @@
 	mov	(r2),r1 / put physical block no of block in file
 	                / sought in r1
 	bne	2f / if no block exists
+	clr	cdev
+	movb	mget0b,r1
+	bne	2f
+	mov	idev,cdev
 	jsr	r0,alloc / allocate a new block
 	mov	r1,(r2) / put new block number into proper location in
 	                / indirect block
@@ -76,6 +89,7 @@
 	mov	(sp),r1 / restore block number of new block
 	jsr	r0,clear / clear new block
 2:
+3:
 	tst	(sp)+ / bump stack pointer
 	rts	r0
 
--- /usr/sys/u6.s.bkp	1972-01-01 00:00:00.000000000 +0000
+++ /usr/sys/u6.s	1972-01-01 00:00:00.000000000 +0000
@@ -83,6 +83,7 @@
 	jmp	error / see 'error' routine
 
 dskr:
+	movb	$1,mget0b
 	mov	(sp),r1 / i-number in r1
 	jsr	r0,iget / get i-node (r1) into i-node section of core
 	mov	i.size,r2 / file size in bytes in r2
@@ -210,6 +211,7 @@
 	jmp	error / ?
 
 dskw: / write routine for non-special files
+	clrb	mget0b
 	mov	(sp),r1 / get an i-node number from the stack into r1
 	jsr	r0,iget / write i-node out (if modified), read i-node 'r1'
 		        / into i-node area of core

You can also download this diff directly.

With the patch, when it encounters a 0 link and mget0b is non-zero, mget will return block mget0b on rf0. Because mget needs to return a block number (on the current device), this is the least intrusive way of implementing this, but it means mget needs to be careful to maintain the notion of the "current device" to mean "the device containing the current i-node" outside of this exceptional return, since the latter will vary when reading a file from the mounted filesystem. dskr sets mget0b to 1 — due to the unique way the V1 unix file system is laid out, block 1 on the rootfs can't usefully be anything except full of zeroes — dskw clears mget0b to get the old behaviour. The procedure for building and installing a thusly-updated kernel in a unix72 environment is outlined in post 023,a. V1 unix I/O buffer count vs. performance benchmark.

# df
718+2309
# cat big large /usr/big /usr/large
@       @@      @@      @@      @@      @@      @@      @@      @# df
718+2309

df and a direct examination confirm the blocks are unchanged by the read (and cdev is restored properly), but are not required because the cat completes significantly faster.

@@ -1,6 +1,7 @@
 extern r1, r2, r5, mq;
 mget(/*i, u.fofp*/) /*-> r1*/ {
+	cdev = idev;
 	mq = *u.fofp >> 8;  // divide […] by 256.
 	r2 = mq;
 
@@ -13,6 +14,11 @@
 
 	r1 = i.dskp[r2/2];
 	if(r1 == 0) {          // if physical block num is zero then need a new block for file
+		cdev = 0;
+		if(r1 = mget0b)
+			return;
+		cdev = idev;
+
 		/*r1 =*/ alloc();  // allocate a new block
 		i.dskp[r2/2] = r1; // physical block number stored in i-node
 		setimod();         // [mark i-node modified, update mtim]
@@ -43,6 +49,11 @@
 	r2 &= 0xF;
 	r1 = i.dskp[r2/2];
 	if(r1 == 0) {          // if no indirect block exists
+		cdev = 0;
+		if(r1 = mget0b)
+			return;
+		cdev = idev;
+
 		/*r1 =*/ alloc();  // allocate a new block
 		i.dskp[r2/2] = r1; // put block number of new [indirect] block in i-node
 		setimod();         // [mark i-node modified, update mtim]
@@ -52,6 +63,11 @@
 	int _indir = r1;         // save block number of indirect block on stack
 	r1 = r5[r2 + _in_indir]; // put physical block no of block in file sought in r1
 	if(r1 == 0) {            // if no block exists
+		cdev = 0;
+		if(r1 = mget0b)
+			return;
+		cdev = idev;
+
 		/*r1 =*/ alloc();    // allocate a new block
 		r5[r2 + _in_indir] = r1; // put new block number into proper location
 		                         // in indirect block

Nit-pick? Correction? Improvement? Annoying? Cute? Anything? Mail, post, or open!