1 : /*
2 : ** 2001 September 15
3 : **
4 : ** The author disclaims copyright to this source code. In place of
5 : ** a legal notice, here is a blessing:
6 : **
7 : ** May you do good and not evil.
8 : ** May you find forgiveness for yourself and forgive others.
9 : ** May you share freely, never taking more than you give.
10 : **
11 : *************************************************************************
12 : ** This is the implementation of the page cache subsystem or "pager".
13 : **
14 : ** The pager is used to access a database disk file. It implements
15 : ** atomic commit and rollback through the use of a journal file that
16 : ** is separate from the database file. The pager also implements file
17 : ** locking to prevent two processes from writing the same database
18 : ** file simultaneously, or one process from reading the database while
19 : ** another is writing.
20 : **
21 : ** @(#) $Id$
22 : */
23 : #ifndef SQLITE_OMIT_DISKIO
24 : #include "sqliteInt.h"
25 : #include "os.h"
26 : #include "pager.h"
27 : #include <assert.h>
28 : #include <string.h>
29 :
30 : /*
31 : ** Macros for troubleshooting. Normally turned off
32 : */
33 : #if 0
34 : #define sqlite3DebugPrintf printf
35 : #define PAGERTRACE1(X) sqlite3DebugPrintf(X)
36 : #define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y)
37 : #define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)
38 : #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
39 : #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
40 : #else
41 : #define PAGERTRACE1(X)
42 : #define PAGERTRACE2(X,Y)
43 : #define PAGERTRACE3(X,Y,Z)
44 : #define PAGERTRACE4(X,Y,Z,W)
45 : #define PAGERTRACE5(X,Y,Z,W,V)
46 : #endif
47 :
48 : /*
49 : ** The following two macros are used within the PAGERTRACEX() macros above
50 : ** to print out file-descriptors.
51 : **
52 : ** PAGERID() takes a pointer to a Pager struct as it's argument. The
53 : ** associated file-descriptor is returned. FILEHANDLEID() takes an OsFile
54 : ** struct as it's argument.
55 : */
56 : #define PAGERID(p) ((int)(p->fd))
57 : #define FILEHANDLEID(fd) ((int)fd)
58 :
59 : /*
60 : ** The page cache as a whole is always in one of the following
61 : ** states:
62 : **
63 : ** PAGER_UNLOCK The page cache is not currently reading or
64 : ** writing the database file. There is no
65 : ** data held in memory. This is the initial
66 : ** state.
67 : **
68 : ** PAGER_SHARED The page cache is reading the database.
69 : ** Writing is not permitted. There can be
70 : ** multiple readers accessing the same database
71 : ** file at the same time.
72 : **
73 : ** PAGER_RESERVED This process has reserved the database for writing
74 : ** but has not yet made any changes. Only one process
75 : ** at a time can reserve the database. The original
76 : ** database file has not been modified so other
77 : ** processes may still be reading the on-disk
78 : ** database file.
79 : **
80 : ** PAGER_EXCLUSIVE The page cache is writing the database.
81 : ** Access is exclusive. No other processes or
82 : ** threads can be reading or writing while one
83 : ** process is writing.
84 : **
85 : ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
86 : ** after all dirty pages have been written to the
87 : ** database file and the file has been synced to
88 : ** disk. All that remains to do is to remove or
89 : ** truncate the journal file and the transaction
90 : ** will be committed.
91 : **
92 : ** The page cache comes up in PAGER_UNLOCK. The first time a
93 : ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
94 : ** After all pages have been released using sqlite_page_unref(),
95 : ** the state transitions back to PAGER_UNLOCK. The first time
96 : ** that sqlite3PagerWrite() is called, the state transitions to
97 : ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be
98 : ** called on an outstanding page which means that the pager must
99 : ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
100 : ** PAGER_RESERVED means that there is an open rollback journal.
101 : ** The transition to PAGER_EXCLUSIVE occurs before any changes
102 : ** are made to the database file, though writes to the rollback
103 : ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()
104 : ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
105 : ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
106 : */
107 : #define PAGER_UNLOCK 0
108 : #define PAGER_SHARED 1 /* same as SHARED_LOCK */
109 : #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
110 : #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
111 : #define PAGER_SYNCED 5
112 :
113 : /*
114 : ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
115 : ** then failed attempts to get a reserved lock will invoke the busy callback.
116 : ** This is off by default. To see why, consider the following scenario:
117 : **
118 : ** Suppose thread A already has a shared lock and wants a reserved lock.
119 : ** Thread B already has a reserved lock and wants an exclusive lock. If
120 : ** both threads are using their busy callbacks, it might be a long time
121 : ** be for one of the threads give up and allows the other to proceed.
122 : ** But if the thread trying to get the reserved lock gives up quickly
123 : ** (if it never invokes its busy callback) then the contention will be
124 : ** resolved quickly.
125 : */
126 : #ifndef SQLITE_BUSY_RESERVED_LOCK
127 : # define SQLITE_BUSY_RESERVED_LOCK 0
128 : #endif
129 :
130 : /*
131 : ** This macro rounds values up so that if the value is an address it
132 : ** is guaranteed to be an address that is aligned to an 8-byte boundary.
133 : */
134 : #define FORCE_ALIGNMENT(X) (((X)+7)&~7)
135 :
136 : /*
137 : ** Each in-memory image of a page begins with the following header.
138 : ** This header is only visible to this pager module. The client
139 : ** code that calls pager sees only the data that follows the header.
140 : **
141 : ** Client code should call sqlite3PagerWrite() on a page prior to making
142 : ** any modifications to that page. The first time sqlite3PagerWrite()
143 : ** is called, the original page contents are written into the rollback
144 : ** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
145 : ** the journal page has made it onto the disk surface, PgHdr.needSync
146 : ** is cleared. The modified page cannot be written back into the original
147 : ** database file until the journal pages has been synced to disk and the
148 : ** PgHdr.needSync has been cleared.
149 : **
150 : ** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and
151 : ** is cleared again when the page content is written back to the original
152 : ** database file.
153 : */
154 : typedef struct PgHdr PgHdr;
155 : struct PgHdr {
156 : Pager *pPager; /* The pager to which this page belongs */
157 : Pgno pgno; /* The page number for this page */
158 : PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
159 : PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
160 : PgHdr *pNextAll; /* A list of all pages */
161 : u8 inJournal; /* TRUE if has been written to journal */
162 : u8 dirty; /* TRUE if we need to write back changes */
163 : u8 needSync; /* Sync journal before writing this page */
164 : u8 alwaysRollback; /* Disable DontRollback() for this page */
165 : u8 needRead; /* Read content if PagerWrite() is called */
166 : short int nRef; /* Number of users of this page */
167 : PgHdr *pDirty, *pPrevDirty; /* Dirty pages */
168 : u32 notUsed; /* Buffer space */
169 : #ifdef SQLITE_CHECK_PAGES
170 : u32 pageHash;
171 : #endif
172 : /* pPager->pageSize bytes of page data follow this header */
173 : /* Pager.nExtra bytes of local data follow the page data */
174 : };
175 :
176 : /*
177 : ** For an in-memory only database, some extra information is recorded about
178 : ** each page so that changes can be rolled back. (Journal files are not
179 : ** used for in-memory databases.) The following information is added to
180 : ** the end of every EXTRA block for in-memory databases.
181 : **
182 : ** This information could have been added directly to the PgHdr structure.
183 : ** But then it would take up an extra 8 bytes of storage on every PgHdr
184 : ** even for disk-based databases. Splitting it out saves 8 bytes. This
185 : ** is only a savings of 0.8% but those percentages add up.
186 : */
187 : typedef struct PgHistory PgHistory;
188 : struct PgHistory {
189 : u8 *pOrig; /* Original page text. Restore to this on a full rollback */
190 : u8 *pStmt; /* Text as it was at the beginning of the current statement */
191 : PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */
192 : u8 inStmt; /* TRUE if in the statement subjournal */
193 : };
194 :
195 : /*
196 : ** A macro used for invoking the codec if there is one
197 : */
198 : #ifdef SQLITE_HAS_CODEC
199 : # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
200 : # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
201 : #else
202 : # define CODEC1(P,D,N,X) /* NO-OP */
203 : # define CODEC2(P,D,N,X) ((char*)D)
204 : #endif
205 :
206 : /*
207 : ** Convert a pointer to a PgHdr into a pointer to its data
208 : ** and back again.
209 : */
210 : #define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
211 : #define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
212 : #define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize])
213 : #define PGHDR_TO_HIST(P,PGR) \
214 : ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
215 :
216 : /*
217 : ** A open page cache is an instance of the following structure.
218 : **
219 : ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
220 : ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
221 : ** and is returned as the result of every major pager API call. The
222 : ** SQLITE_FULL return code is slightly different. It persists only until the
223 : ** next successful rollback is performed on the pager cache. Also,
224 : ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
225 : ** APIs, they may still be used successfully.
226 : */
227 : struct Pager {
228 : u8 journalOpen; /* True if journal file descriptors is valid */
229 : u8 journalStarted; /* True if header of journal is synced */
230 : u8 useJournal; /* Use a rollback journal on this file */
231 : u8 noReadlock; /* Do not bother to obtain readlocks */
232 : u8 stmtOpen; /* True if the statement subjournal is open */
233 : u8 stmtInUse; /* True we are in a statement subtransaction */
234 : u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
235 : u8 noSync; /* Do not sync the journal if true */
236 : u8 fullSync; /* Do extra syncs of the journal for robustness */
237 : u8 full_fsync; /* Use F_FULLFSYNC when available */
238 : u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
239 : u8 tempFile; /* zFilename is a temporary file */
240 : u8 readOnly; /* True for a read-only database */
241 : u8 needSync; /* True if an fsync() is needed on the journal */
242 : u8 dirtyCache; /* True if cached pages have changed */
243 : u8 alwaysRollback; /* Disable DontRollback() for all pages */
244 : u8 memDb; /* True to inhibit all file I/O */
245 : u8 setMaster; /* True if a m-j name has been written to jrnl */
246 : u8 doNotSync; /* Boolean. While true, do not spill the cache */
247 : u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
248 : u8 changeCountDone; /* Set after incrementing the change-counter */
249 : int errCode; /* One of several kinds of errors */
250 : int dbSize; /* Number of pages in the file */
251 : int origDbSize; /* dbSize before the current change */
252 : int stmtSize; /* Size of database (in pages) at stmt_begin() */
253 : int nRec; /* Number of pages written to the journal */
254 : u32 cksumInit; /* Quasi-random value added to every checksum */
255 : int stmtNRec; /* Number of records in stmt subjournal */
256 : int nExtra; /* Add this many bytes to each in-memory page */
257 : int pageSize; /* Number of bytes in a page */
258 : int nPage; /* Total number of in-memory pages */
259 : int nMaxPage; /* High water mark of nPage */
260 : int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
261 : int mxPage; /* Maximum number of pages to hold in cache */
262 : u8 *aInJournal; /* One bit for each page in the database file */
263 : u8 *aInStmt; /* One bit for each page in the database */
264 : char *zFilename; /* Name of the database file */
265 : char *zJournal; /* Name of the journal file */
266 : char *zDirectory; /* Directory hold database and journal files */
267 : OsFile *fd, *jfd; /* File descriptors for database and journal */
268 : OsFile *stfd; /* File descriptor for the statement subjournal*/
269 : BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
270 : PgHdr *pFirst, *pLast; /* List of free pages */
271 : PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
272 : PgHdr *pAll; /* List of all pages */
273 : PgHdr *pStmt; /* List of pages in the statement subjournal */
274 : PgHdr *pDirty; /* List of all dirty pages */
275 : i64 journalOff; /* Current byte offset in the journal file */
276 : i64 journalHdr; /* Byte offset to previous journal header */
277 : i64 stmtHdrOff; /* First journal header written this statement */
278 : i64 stmtCksum; /* cksumInit when statement was started */
279 : i64 stmtJSize; /* Size of journal at stmt_begin() */
280 : int sectorSize; /* Assumed sector size during rollback */
281 : #ifdef SQLITE_TEST
282 : int nHit, nMiss; /* Cache hits and missing */
283 : int nRead, nWrite; /* Database pages read/written */
284 : #endif
285 : void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
286 : void (*xReiniter)(DbPage*,int); /* Call this routine when reloading pages */
287 : #ifdef SQLITE_HAS_CODEC
288 : void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
289 : void *pCodecArg; /* First argument to xCodec() */
290 : #endif
291 : int nHash; /* Size of the pager hash table */
292 : PgHdr **aHash; /* Hash table to map page number to PgHdr */
293 : #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
294 : Pager *pNext; /* Linked list of pagers in this thread */
295 : #endif
296 : char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
297 : char dbFileVers[16]; /* Changes whenever database file changes */
298 : };
299 :
300 : /*
301 : ** The following global variables hold counters used for
302 : ** testing purposes only. These variables do not exist in
303 : ** a non-testing build. These variables are not thread-safe.
304 : */
305 : #ifdef SQLITE_TEST
306 : int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
307 : int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
308 : int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
309 : int sqlite3_pager_pgfree_count = 0; /* Number of cache pages freed */
310 : # define PAGER_INCR(v) v++
311 : #else
312 : # define PAGER_INCR(v)
313 : #endif
314 :
315 :
316 :
317 : /*
318 : ** Journal files begin with the following magic string. The data
319 : ** was obtained from /dev/random. It is used only as a sanity check.
320 : **
321 : ** Since version 2.8.0, the journal format contains additional sanity
322 : ** checking information. If the power fails while the journal is begin
323 : ** written, semi-random garbage data might appear in the journal
324 : ** file after power is restored. If an attempt is then made
325 : ** to roll the journal back, the database could be corrupted. The additional
326 : ** sanity checking data is an attempt to discover the garbage in the
327 : ** journal and ignore it.
328 : **
329 : ** The sanity checking information for the new journal format consists
330 : ** of a 32-bit checksum on each page of data. The checksum covers both
331 : ** the page number and the pPager->pageSize bytes of data for the page.
332 : ** This cksum is initialized to a 32-bit random value that appears in the
333 : ** journal file right after the header. The random initializer is important,
334 : ** because garbage data that appears at the end of a journal is likely
335 : ** data that was once in other files that have now been deleted. If the
336 : ** garbage data came from an obsolete journal file, the checksums might
337 : ** be correct. But by initializing the checksum to random value which
338 : ** is different for every journal, we minimize that risk.
339 : */
340 : static const unsigned char aJournalMagic[] = {
341 : 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
342 : };
343 :
344 : /*
345 : ** The size of the header and of each page in the journal is determined
346 : ** by the following macros.
347 : */
348 : #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
349 :
350 : /*
351 : ** The journal header size for this pager. In the future, this could be
352 : ** set to some value read from the disk controller. The important
353 : ** characteristic is that it is the same size as a disk sector.
354 : */
355 : #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
356 :
357 : /*
358 : ** The macro MEMDB is true if we are dealing with an in-memory database.
359 : ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
360 : ** the value of MEMDB will be a constant and the compiler will optimize
361 : ** out code that would never execute.
362 : */
363 : #ifdef SQLITE_OMIT_MEMORYDB
364 : # define MEMDB 0
365 : #else
366 : # define MEMDB pPager->memDb
367 : #endif
368 :
369 : /*
370 : ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
371 : ** reserved for working around a windows/posix incompatibility). It is
372 : ** used in the journal to signify that the remainder of the journal file
373 : ** is devoted to storing a master journal name - there are no more pages to
374 : ** roll back. See comments for function writeMasterJournal() for details.
375 : */
376 : /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
377 : #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
378 :
379 : /*
380 : ** The maximum legal page number is (2^31 - 1).
381 : */
382 : #define PAGER_MAX_PGNO 2147483647
383 :
384 : /*
385 : ** Enable reference count tracking (for debugging) here:
386 : */
387 : #ifdef SQLITE_DEBUG
388 : int pager3_refinfo_enable = 0;
389 : static void pager_refinfo(PgHdr *p){
390 : static int cnt = 0;
391 : if( !pager3_refinfo_enable ) return;
392 : sqlite3DebugPrintf(
393 : "REFCNT: %4d addr=%p nRef=%-3d total=%d\n",
394 : p->pgno, PGHDR_TO_DATA(p), p->nRef, p->pPager->nRef
395 : );
396 : cnt++; /* Something to set a breakpoint on */
397 : }
398 : # define REFINFO(X) pager_refinfo(X)
399 : #else
400 : # define REFINFO(X)
401 : #endif
402 :
403 : /*
404 : ** Return true if page *pPg has already been written to the statement
405 : ** journal (or statement snapshot has been created, if *pPg is part
406 : ** of an in-memory database).
407 : */
408 327 : static int pageInStatement(PgHdr *pPg){
409 327 : Pager *pPager = pPg->pPager;
410 327 : if( MEMDB ){
411 298 : return PGHDR_TO_HIST(pPg, pPager)->inStmt;
412 : }else{
413 29 : Pgno pgno = pPg->pgno;
414 29 : u8 *a = pPager->aInStmt;
415 29 : return (a && (int)pgno<=pPager->stmtSize && (a[pgno/8] & (1<<(pgno&7))));
416 : }
417 : }
418 :
419 : /*
420 : ** Change the size of the pager hash table to N. N must be a power
421 : ** of two.
422 : */
423 108 : static void pager_resize_hash_table(Pager *pPager, int N){
424 : PgHdr **aHash, *pPg;
425 : assert( N>0 && (N&(N-1))==0 );
426 108 : aHash = sqliteMalloc( sizeof(aHash[0])*N );
427 108 : if( aHash==0 ){
428 : /* Failure to rehash is not an error. It is only a performance hit. */
429 0 : return;
430 : }
431 108 : sqliteFree(pPager->aHash);
432 108 : pPager->nHash = N;
433 108 : pPager->aHash = aHash;
434 108 : for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
435 : int h;
436 0 : if( pPg->pgno==0 ){
437 : assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
438 0 : continue;
439 : }
440 0 : h = pPg->pgno & (N-1);
441 0 : pPg->pNextHash = aHash[h];
442 0 : if( aHash[h] ){
443 0 : aHash[h]->pPrevHash = pPg;
444 : }
445 0 : aHash[h] = pPg;
446 0 : pPg->pPrevHash = 0;
447 : }
448 : }
449 :
450 : /*
451 : ** Read a 32-bit integer from the given file descriptor. Store the integer
452 : ** that is read in *pRes. Return SQLITE_OK if everything worked, or an
453 : ** error code is something goes wrong.
454 : **
455 : ** All values are stored on disk as big-endian.
456 : */
457 8 : static int read32bits(OsFile *fd, u32 *pRes){
458 : unsigned char ac[4];
459 8 : int rc = sqlite3OsRead(fd, ac, sizeof(ac));
460 8 : if( rc==SQLITE_OK ){
461 8 : *pRes = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
462 : }
463 8 : return rc;
464 : }
465 :
466 : /*
467 : ** Write a 32-bit integer into a string buffer in big-endian byte order.
468 : */
469 142 : static void put32bits(char *ac, u32 val){
470 142 : ac[0] = (val>>24) & 0xff;
471 142 : ac[1] = (val>>16) & 0xff;
472 142 : ac[2] = (val>>8) & 0xff;
473 142 : ac[3] = val & 0xff;
474 142 : }
475 :
476 : /*
477 : ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
478 : ** on success or an error code is something goes wrong.
479 : */
480 14 : static int write32bits(OsFile *fd, u32 val){
481 : char ac[4];
482 14 : put32bits(ac, val);
483 14 : return sqlite3OsWrite(fd, ac, 4);
484 : }
485 :
486 : /*
487 : ** Read a 32-bit integer at offset 'offset' from the page identified by
488 : ** page header 'p'.
489 : */
490 14 : static u32 retrieve32bits(PgHdr *p, int offset){
491 : unsigned char *ac;
492 14 : ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
493 14 : return (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
494 : }
495 :
496 :
497 : /*
498 : ** This function should be called when an error occurs within the pager
499 : ** code. The first argument is a pointer to the pager structure, the
500 : ** second the error-code about to be returned by a pager API function.
501 : ** The value returned is a copy of the second argument to this function.
502 : **
503 : ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
504 : ** the error becomes persistent. All subsequent API calls on this Pager
505 : ** will immediately return the same error code.
506 : */
507 15 : static int pager_error(Pager *pPager, int rc){
508 15 : int rc2 = rc & 0xff;
509 : assert( pPager->errCode==SQLITE_FULL || pPager->errCode==SQLITE_OK );
510 15 : if(
511 : rc2==SQLITE_FULL ||
512 : rc2==SQLITE_IOERR ||
513 : rc2==SQLITE_CORRUPT
514 : ){
515 0 : pPager->errCode = rc;
516 : }
517 15 : return rc;
518 : }
519 :
520 : #ifdef SQLITE_CHECK_PAGES
521 : /*
522 : ** Return a 32-bit hash of the page data for pPage.
523 : */
524 : static u32 pager_pagehash(PgHdr *pPage){
525 : u32 hash = 0;
526 : int i;
527 : unsigned char *pData = (unsigned char *)PGHDR_TO_DATA(pPage);
528 : for(i=0; i<pPage->pPager->pageSize; i++){
529 : hash = (hash+i)^pData[i];
530 : }
531 : return hash;
532 : }
533 :
534 : /*
535 : ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
536 : ** is defined, and NDEBUG is not defined, an assert() statement checks
537 : ** that the page is either dirty or still matches the calculated page-hash.
538 : */
539 : #define CHECK_PAGE(x) checkPage(x)
540 : static void checkPage(PgHdr *pPg){
541 : Pager *pPager = pPg->pPager;
542 : assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty ||
543 : pPg->pageHash==pager_pagehash(pPg) );
544 : }
545 :
546 : #else
547 : #define CHECK_PAGE(x)
548 : #endif
549 :
550 : /*
551 : ** When this is called the journal file for pager pPager must be open.
552 : ** The master journal file name is read from the end of the file and
553 : ** written into memory obtained from sqliteMalloc(). *pzMaster is
554 : ** set to point at the memory and SQLITE_OK returned. The caller must
555 : ** sqliteFree() *pzMaster.
556 : **
557 : ** If no master journal file name is present *pzMaster is set to 0 and
558 : ** SQLITE_OK returned.
559 : */
560 1 : static int readMasterJournal(OsFile *pJrnl, char **pzMaster){
561 : int rc;
562 : u32 len;
563 : i64 szJ;
564 : u32 cksum;
565 : int i;
566 : unsigned char aMagic[8]; /* A buffer to hold the magic header */
567 :
568 1 : *pzMaster = 0;
569 :
570 1 : rc = sqlite3OsFileSize(pJrnl, &szJ);
571 1 : if( rc!=SQLITE_OK || szJ<16 ) return rc;
572 :
573 1 : rc = sqlite3OsSeek(pJrnl, szJ-16);
574 1 : if( rc!=SQLITE_OK ) return rc;
575 :
576 1 : rc = read32bits(pJrnl, &len);
577 1 : if( rc!=SQLITE_OK ) return rc;
578 :
579 1 : rc = read32bits(pJrnl, &cksum);
580 1 : if( rc!=SQLITE_OK ) return rc;
581 :
582 1 : rc = sqlite3OsRead(pJrnl, aMagic, 8);
583 1 : if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
584 :
585 0 : rc = sqlite3OsSeek(pJrnl, szJ-16-len);
586 0 : if( rc!=SQLITE_OK ) return rc;
587 :
588 0 : *pzMaster = (char *)sqliteMalloc(len+1);
589 0 : if( !*pzMaster ){
590 0 : return SQLITE_NOMEM;
591 : }
592 0 : rc = sqlite3OsRead(pJrnl, *pzMaster, len);
593 0 : if( rc!=SQLITE_OK ){
594 0 : sqliteFree(*pzMaster);
595 0 : *pzMaster = 0;
596 0 : return rc;
597 : }
598 :
599 : /* See if the checksum matches the master journal name */
600 0 : for(i=0; i<len; i++){
601 0 : cksum -= (*pzMaster)[i];
602 : }
603 0 : if( cksum ){
604 : /* If the checksum doesn't add up, then one or more of the disk sectors
605 : ** containing the master journal filename is corrupted. This means
606 : ** definitely roll back, so just return SQLITE_OK and report a (nul)
607 : ** master-journal filename.
608 : */
609 0 : sqliteFree(*pzMaster);
610 0 : *pzMaster = 0;
611 : }else{
612 0 : (*pzMaster)[len] = '\0';
613 : }
614 :
615 0 : return SQLITE_OK;
616 : }
617 :
618 : /*
619 : ** Seek the journal file descriptor to the next sector boundary where a
620 : ** journal header may be read or written. Pager.journalOff is updated with
621 : ** the new seek offset.
622 : **
623 : ** i.e for a sector size of 512:
624 : **
625 : ** Input Offset Output Offset
626 : ** ---------------------------------------
627 : ** 0 0
628 : ** 512 512
629 : ** 100 512
630 : ** 2000 2048
631 : **
632 : */
633 17 : static int seekJournalHdr(Pager *pPager){
634 17 : i64 offset = 0;
635 17 : i64 c = pPager->journalOff;
636 17 : if( c ){
637 1 : offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
638 : }
639 : assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
640 : assert( offset>=c );
641 : assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
642 17 : pPager->journalOff = offset;
643 17 : return sqlite3OsSeek(pPager->jfd, pPager->journalOff);
644 : }
645 :
646 : /*
647 : ** The journal file must be open when this routine is called. A journal
648 : ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
649 : ** current location.
650 : **
651 : ** The format for the journal header is as follows:
652 : ** - 8 bytes: Magic identifying journal format.
653 : ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
654 : ** - 4 bytes: Random number used for page hash.
655 : ** - 4 bytes: Initial database page count.
656 : ** - 4 bytes: Sector size used by the process that wrote this journal.
657 : **
658 : ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
659 : */
660 15 : static int writeJournalHdr(Pager *pPager){
661 : char zHeader[sizeof(aJournalMagic)+16];
662 : int rc;
663 :
664 15 : if( pPager->stmtHdrOff==0 ){
665 15 : pPager->stmtHdrOff = pPager->journalOff;
666 : }
667 :
668 15 : rc = seekJournalHdr(pPager);
669 15 : if( rc ) return rc;
670 :
671 15 : pPager->journalHdr = pPager->journalOff;
672 15 : pPager->journalOff += JOURNAL_HDR_SZ(pPager);
673 :
674 : /* FIX ME:
675 : **
676 : ** Possibly for a pager not in no-sync mode, the journal magic should not
677 : ** be written until nRec is filled in as part of next syncJournal().
678 : **
679 : ** Actually maybe the whole journal header should be delayed until that
680 : ** point. Think about this.
681 : */
682 15 : memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
683 : /* The nRec Field. 0xFFFFFFFF for no-sync journals. */
684 15 : put32bits(&zHeader[sizeof(aJournalMagic)], pPager->noSync ? 0xffffffff : 0);
685 : /* The random check-hash initialiser */
686 15 : sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
687 15 : put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
688 : /* The initial database size */
689 15 : put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
690 : /* The assumed sector size for this process */
691 15 : put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
692 : IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, sizeof(zHeader)))
693 15 : rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader));
694 :
695 : /* The journal header has been written successfully. Seek the journal
696 : ** file descriptor to the end of the journal header sector.
697 : */
698 15 : if( rc==SQLITE_OK ){
699 : IOTRACE(("JTAIL %p %lld\n", pPager, pPager->journalOff-1))
700 15 : rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff-1);
701 15 : if( rc==SQLITE_OK ){
702 15 : rc = sqlite3OsWrite(pPager->jfd, "\000", 1);
703 : }
704 : }
705 15 : return rc;
706 : }
707 :
708 : /*
709 : ** The journal file must be open when this is called. A journal header file
710 : ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
711 : ** file. See comments above function writeJournalHdr() for a description of
712 : ** the journal header format.
713 : **
714 : ** If the header is read successfully, *nRec is set to the number of
715 : ** page records following this header and *dbSize is set to the size of the
716 : ** database before the transaction began, in pages. Also, pPager->cksumInit
717 : ** is set to the value read from the journal header. SQLITE_OK is returned
718 : ** in this case.
719 : **
720 : ** If the journal header file appears to be corrupted, SQLITE_DONE is
721 : ** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes
722 : ** cannot be read from the journal file an error code is returned.
723 : */
724 : static int readJournalHdr(
725 : Pager *pPager,
726 : i64 journalSize,
727 : u32 *pNRec,
728 : u32 *pDbSize
729 2 : ){
730 : int rc;
731 : unsigned char aMagic[8]; /* A buffer to hold the magic header */
732 :
733 2 : rc = seekJournalHdr(pPager);
734 2 : if( rc ) return rc;
735 :
736 2 : if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
737 1 : return SQLITE_DONE;
738 : }
739 :
740 1 : rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic));
741 1 : if( rc ) return rc;
742 :
743 1 : if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
744 0 : return SQLITE_DONE;
745 : }
746 :
747 1 : rc = read32bits(pPager->jfd, pNRec);
748 1 : if( rc ) return rc;
749 :
750 1 : rc = read32bits(pPager->jfd, &pPager->cksumInit);
751 1 : if( rc ) return rc;
752 :
753 1 : rc = read32bits(pPager->jfd, pDbSize);
754 1 : if( rc ) return rc;
755 :
756 : /* Update the assumed sector-size to match the value used by
757 : ** the process that created this journal. If this journal was
758 : ** created by a process other than this one, then this routine
759 : ** is being called from within pager_playback(). The local value
760 : ** of Pager.sectorSize is restored at the end of that routine.
761 : */
762 1 : rc = read32bits(pPager->jfd, (u32 *)&pPager->sectorSize);
763 1 : if( rc ) return rc;
764 :
765 1 : pPager->journalOff += JOURNAL_HDR_SZ(pPager);
766 1 : rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
767 1 : return rc;
768 : }
769 :
770 :
771 : /*
772 : ** Write the supplied master journal name into the journal file for pager
773 : ** pPager at the current location. The master journal name must be the last
774 : ** thing written to a journal file. If the pager is in full-sync mode, the
775 : ** journal file descriptor is advanced to the next sector boundary before
776 : ** anything is written. The format is:
777 : **
778 : ** + 4 bytes: PAGER_MJ_PGNO.
779 : ** + N bytes: length of master journal name.
780 : ** + 4 bytes: N
781 : ** + 4 bytes: Master journal name checksum.
782 : ** + 8 bytes: aJournalMagic[].
783 : **
784 : ** The master journal page checksum is the sum of the bytes in the master
785 : ** journal name.
786 : **
787 : ** If zMaster is a NULL pointer (occurs for a single database transaction),
788 : ** this call is a no-op.
789 : */
790 14 : static int writeMasterJournal(Pager *pPager, const char *zMaster){
791 : int rc;
792 : int len;
793 : int i;
794 14 : u32 cksum = 0;
795 : char zBuf[sizeof(aJournalMagic)+2*4];
796 :
797 14 : if( !zMaster || pPager->setMaster) return SQLITE_OK;
798 0 : pPager->setMaster = 1;
799 :
800 0 : len = strlen(zMaster);
801 0 : for(i=0; i<len; i++){
802 0 : cksum += zMaster[i];
803 : }
804 :
805 : /* If in full-sync mode, advance to the next disk sector before writing
806 : ** the master journal name. This is in case the previous page written to
807 : ** the journal has already been synced.
808 : */
809 0 : if( pPager->fullSync ){
810 0 : rc = seekJournalHdr(pPager);
811 0 : if( rc!=SQLITE_OK ) return rc;
812 : }
813 0 : pPager->journalOff += (len+20);
814 :
815 0 : rc = write32bits(pPager->jfd, PAGER_MJ_PGNO(pPager));
816 0 : if( rc!=SQLITE_OK ) return rc;
817 :
818 0 : rc = sqlite3OsWrite(pPager->jfd, zMaster, len);
819 0 : if( rc!=SQLITE_OK ) return rc;
820 :
821 0 : put32bits(zBuf, len);
822 0 : put32bits(&zBuf[4], cksum);
823 0 : memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
824 0 : rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic));
825 0 : pPager->needSync = !pPager->noSync;
826 0 : return rc;
827 : }
828 :
829 : /*
830 : ** Add or remove a page from the list of all pages that are in the
831 : ** statement journal.
832 : **
833 : ** The Pager keeps a separate list of pages that are currently in
834 : ** the statement journal. This helps the sqlite3PagerStmtCommit()
835 : ** routine run MUCH faster for the common case where there are many
836 : ** pages in memory but only a few are in the statement journal.
837 : */
838 0 : static void page_add_to_stmt_list(PgHdr *pPg){
839 0 : Pager *pPager = pPg->pPager;
840 0 : PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
841 : assert( MEMDB );
842 0 : if( !pHist->inStmt ){
843 : assert( pHist->pPrevStmt==0 && pHist->pNextStmt==0 );
844 0 : if( pPager->pStmt ){
845 0 : PGHDR_TO_HIST(pPager->pStmt, pPager)->pPrevStmt = pPg;
846 : }
847 0 : pHist->pNextStmt = pPager->pStmt;
848 0 : pPager->pStmt = pPg;
849 0 : pHist->inStmt = 1;
850 : }
851 0 : }
852 :
853 : /*
854 : ** Find a page in the hash table given its page number. Return
855 : ** a pointer to the page or NULL if not found.
856 : */
857 2115 : static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
858 : PgHdr *p;
859 2115 : if( pPager->aHash==0 ) return 0;
860 2007 : p = pPager->aHash[pgno & (pPager->nHash-1)];
861 4014 : while( p && p->pgno!=pgno ){
862 0 : p = p->pNextHash;
863 : }
864 2007 : return p;
865 : }
866 :
867 : /*
868 : ** Unlock the database file.
869 : */
870 883 : static void pager_unlock(Pager *pPager){
871 883 : if( !pPager->exclusiveMode ){
872 883 : if( !MEMDB ){
873 34 : sqlite3OsUnlock(pPager->fd, NO_LOCK);
874 34 : pPager->dbSize = -1;
875 : IOTRACE(("UNLOCK %p\n", pPager))
876 : }
877 883 : pPager->state = PAGER_UNLOCK;
878 883 : pPager->changeCountDone = 0;
879 : }
880 883 : }
881 :
882 : /*
883 : ** Execute a rollback if a transaction is active and unlock the
884 : ** database file. This is a no-op if the pager has already entered
885 : ** the error-state.
886 : */
887 883 : static void pagerUnlockAndRollback(Pager *p){
888 883 : if( p->errCode ) return;
889 : assert( p->state>=PAGER_RESERVED || p->journalOpen==0 );
890 883 : if( p->state>=PAGER_RESERVED ){
891 0 : sqlite3PagerRollback(p);
892 : }
893 883 : pager_unlock(p);
894 : assert( p->errCode || !p->journalOpen || (p->exclusiveMode&&!p->journalOff) );
895 : assert( p->errCode || !p->stmtOpen || p->exclusiveMode );
896 : }
897 :
898 :
899 : /*
900 : ** Clear the in-memory cache. This routine
901 : ** sets the state of the pager back to what it was when it was first
902 : ** opened. Any outstanding pages are invalidated and subsequent attempts
903 : ** to access those pages will likely result in a coredump.
904 : */
905 142 : static void pager_reset(Pager *pPager){
906 : PgHdr *pPg, *pNext;
907 142 : if( pPager->errCode ) return;
908 343 : for(pPg=pPager->pAll; pPg; pPg=pNext){
909 : IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
910 : PAGER_INCR(sqlite3_pager_pgfree_count);
911 201 : pNext = pPg->pNextAll;
912 201 : sqliteFree(pPg);
913 : }
914 142 : pPager->pStmt = 0;
915 142 : pPager->pFirst = 0;
916 142 : pPager->pFirstSynced = 0;
917 142 : pPager->pLast = 0;
918 142 : pPager->pAll = 0;
919 142 : pPager->nHash = 0;
920 142 : sqliteFree(pPager->aHash);
921 142 : pPager->nPage = 0;
922 142 : pPager->aHash = 0;
923 142 : pPager->nRef = 0;
924 : }
925 :
926 : /*
927 : ** This routine ends a transaction. A transaction is ended by either
928 : ** a COMMIT or a ROLLBACK.
929 : **
930 : ** When this routine is called, the pager has the journal file open and
931 : ** a RESERVED or EXCLUSIVE lock on the database. This routine will release
932 : ** the database lock and acquires a SHARED lock in its place if that is
933 : ** the appropriate thing to do. Release locks usually is appropriate,
934 : ** unless we are in exclusive access mode or unless this is a
935 : ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
936 : **
937 : ** The journal file is either deleted or truncated.
938 : **
939 : ** TODO: Consider keeping the journal file open for temporary databases.
940 : ** This might give a performance improvement on windows where opening
941 : ** a file is an expensive operation.
942 : */
943 16 : static int pager_end_transaction(Pager *pPager){
944 : PgHdr *pPg;
945 16 : int rc = SQLITE_OK;
946 16 : int rc2 = SQLITE_OK;
947 : assert( !MEMDB );
948 16 : if( pPager->state<PAGER_RESERVED ){
949 1 : return SQLITE_OK;
950 : }
951 15 : sqlite3PagerStmtCommit(pPager);
952 15 : if( pPager->stmtOpen && !pPager->exclusiveMode ){
953 0 : sqlite3OsClose(&pPager->stfd);
954 0 : pPager->stmtOpen = 0;
955 : }
956 15 : if( pPager->journalOpen ){
957 15 : if( pPager->exclusiveMode
958 : && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){;
959 0 : sqlite3OsSeek(pPager->jfd, 0);
960 0 : pPager->journalOff = 0;
961 0 : pPager->journalStarted = 0;
962 : }else{
963 15 : sqlite3OsClose(&pPager->jfd);
964 15 : pPager->journalOpen = 0;
965 15 : if( rc==SQLITE_OK ){
966 15 : rc = sqlite3OsDelete(pPager->zJournal);
967 : }
968 : }
969 15 : sqliteFree( pPager->aInJournal );
970 15 : pPager->aInJournal = 0;
971 45 : for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
972 30 : pPg->inJournal = 0;
973 30 : pPg->dirty = 0;
974 30 : pPg->needSync = 0;
975 30 : pPg->alwaysRollback = 0;
976 : #ifdef SQLITE_CHECK_PAGES
977 : pPg->pageHash = pager_pagehash(pPg);
978 : #endif
979 : }
980 15 : pPager->pDirty = 0;
981 15 : pPager->dirtyCache = 0;
982 15 : pPager->nRec = 0;
983 : }else{
984 : assert( pPager->aInJournal==0 );
985 : assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
986 : }
987 :
988 15 : if( !pPager->exclusiveMode ){
989 15 : rc2 = sqlite3OsUnlock(pPager->fd, SHARED_LOCK);
990 15 : pPager->state = PAGER_SHARED;
991 0 : }else if( pPager->state==PAGER_SYNCED ){
992 0 : pPager->state = PAGER_EXCLUSIVE;
993 : }
994 15 : pPager->origDbSize = 0;
995 15 : pPager->setMaster = 0;
996 15 : pPager->needSync = 0;
997 15 : pPager->pFirstSynced = pPager->pFirst;
998 15 : pPager->dbSize = -1;
999 :
1000 15 : return (rc==SQLITE_OK?rc2:rc);
1001 : }
1002 :
1003 : /*
1004 : ** Compute and return a checksum for the page of data.
1005 : **
1006 : ** This is not a real checksum. It is really just the sum of the
1007 : ** random initial value and the page number. We experimented with
1008 : ** a checksum of the entire data, but that was found to be too slow.
1009 : **
1010 : ** Note that the page number is stored at the beginning of data and
1011 : ** the checksum is stored at the end. This is important. If journal
1012 : ** corruption occurs due to a power failure, the most likely scenario
1013 : ** is that one end or the other of the record will be changed. It is
1014 : ** much less likely that the two ends of the journal record will be
1015 : ** correct and the middle be corrupt. Thus, this "checksum" scheme,
1016 : ** though fast and simple, catches the mostly likely kind of corruption.
1017 : **
1018 : ** FIX ME: Consider adding every 200th (or so) byte of the data to the
1019 : ** checksum. That way if a single page spans 3 or more disk sectors and
1020 : ** only the middle sector is corrupt, we will still have a reasonable
1021 : ** chance of failing the checksum and thus detecting the problem.
1022 : */
1023 28 : static u32 pager_cksum(Pager *pPager, const u8 *aData){
1024 28 : u32 cksum = pPager->cksumInit;
1025 28 : int i = pPager->pageSize-200;
1026 196 : while( i>0 ){
1027 140 : cksum += aData[i];
1028 140 : i -= 200;
1029 : }
1030 28 : return cksum;
1031 : }
1032 :
1033 : /* Forward declaration */
1034 : static void makeClean(PgHdr*);
1035 :
1036 : /*
1037 : ** Read a single page from the journal file opened on file descriptor
1038 : ** jfd. Playback this one page.
1039 : **
1040 : ** If useCksum==0 it means this journal does not use checksums. Checksums
1041 : ** are not used in statement journals because statement journals do not
1042 : ** need to survive power failures.
1043 : */
1044 1 : static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
1045 : int rc;
1046 : PgHdr *pPg; /* An existing page in the cache */
1047 : Pgno pgno; /* The page number of a page in journal */
1048 : u32 cksum; /* Checksum used for sanity checking */
1049 1 : u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */
1050 :
1051 : /* useCksum should be true for the main journal and false for
1052 : ** statement journals. Verify that this is always the case
1053 : */
1054 : assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
1055 : assert( aData );
1056 :
1057 1 : rc = read32bits(jfd, &pgno);
1058 1 : if( rc!=SQLITE_OK ) return rc;
1059 1 : rc = sqlite3OsRead(jfd, aData, pPager->pageSize);
1060 1 : if( rc!=SQLITE_OK ) return rc;
1061 1 : pPager->journalOff += pPager->pageSize + 4;
1062 :
1063 : /* Sanity checking on the page. This is more important that I originally
1064 : ** thought. If a power failure occurs while the journal is being written,
1065 : ** it could cause invalid data to be written into the journal. We need to
1066 : ** detect this invalid data (with high probability) and ignore it.
1067 : */
1068 1 : if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1069 0 : return SQLITE_DONE;
1070 : }
1071 1 : if( pgno>(unsigned)pPager->dbSize ){
1072 0 : return SQLITE_OK;
1073 : }
1074 1 : if( useCksum ){
1075 1 : rc = read32bits(jfd, &cksum);
1076 1 : if( rc ) return rc;
1077 1 : pPager->journalOff += 4;
1078 1 : if( pager_cksum(pPager, aData)!=cksum ){
1079 0 : return SQLITE_DONE;
1080 : }
1081 : }
1082 :
1083 : assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
1084 :
1085 : /* If the pager is in RESERVED state, then there must be a copy of this
1086 : ** page in the pager cache. In this case just update the pager cache,
1087 : ** not the database file. The page is left marked dirty in this case.
1088 : **
1089 : ** If in EXCLUSIVE state, then we update the pager cache if it exists
1090 : ** and the main file. The page is then marked not dirty.
1091 : **
1092 : ** Ticket #1171: The statement journal might contain page content that is
1093 : ** different from the page content at the start of the transaction.
1094 : ** This occurs when a page is changed prior to the start of a statement
1095 : ** then changed again within the statement. When rolling back such a
1096 : ** statement we must not write to the original database unless we know
1097 : ** for certain that original page contents are in the main rollback
1098 : ** journal. Otherwise, if a full ROLLBACK occurs after the statement
1099 : ** rollback the full ROLLBACK will not restore the page to its original
1100 : ** content. Two conditions must be met before writing to the database
1101 : ** files. (1) the database must be locked. (2) we know that the original
1102 : ** page content is in the main journal either because the page is not in
1103 : ** cache or else it is marked as needSync==0.
1104 : */
1105 1 : pPg = pager_lookup(pPager, pgno);
1106 : assert( pPager->state>=PAGER_EXCLUSIVE || pPg!=0 );
1107 : PAGERTRACE3("PLAYBACK %d page %d\n", PAGERID(pPager), pgno);
1108 1 : if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
1109 0 : rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
1110 0 : if( rc==SQLITE_OK ){
1111 0 : rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize);
1112 : }
1113 0 : if( pPg ){
1114 0 : makeClean(pPg);
1115 : }
1116 : }
1117 1 : if( pPg ){
1118 : /* No page should ever be explicitly rolled back that is in use, except
1119 : ** for page 1 which is held in use in order to keep the lock on the
1120 : ** database active. However such a page may be rolled back as a result
1121 : ** of an internal error resulting in an automatic call to
1122 : ** sqlite3PagerRollback().
1123 : */
1124 : void *pData;
1125 : /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
1126 1 : pData = PGHDR_TO_DATA(pPg);
1127 1 : memcpy(pData, aData, pPager->pageSize);
1128 1 : if( pPager->xReiniter ){
1129 1 : pPager->xReiniter(pPg, pPager->pageSize);
1130 : }
1131 : #ifdef SQLITE_CHECK_PAGES
1132 : pPg->pageHash = pager_pagehash(pPg);
1133 : #endif
1134 : /* If this was page 1, then restore the value of Pager.dbFileVers.
1135 : ** Do this before any decoding. */
1136 1 : if( pgno==1 ){
1137 0 : memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
1138 : }
1139 :
1140 : /* Decode the page just read from disk */
1141 : CODEC1(pPager, pData, pPg->pgno, 3);
1142 : }
1143 1 : return rc;
1144 : }
1145 :
1146 : /*
1147 : ** Parameter zMaster is the name of a master journal file. A single journal
1148 : ** file that referred to the master journal file has just been rolled back.
1149 : ** This routine checks if it is possible to delete the master journal file,
1150 : ** and does so if it is.
1151 : **
1152 : ** The master journal file contains the names of all child journals.
1153 : ** To tell if a master journal can be deleted, check to each of the
1154 : ** children. If all children are either missing or do not refer to
1155 : ** a different master journal, then this master journal can be deleted.
1156 : */
1157 0 : static int pager_delmaster(const char *zMaster){
1158 : int rc;
1159 0 : int master_open = 0;
1160 0 : OsFile *master = 0;
1161 0 : char *zMasterJournal = 0; /* Contents of master journal file */
1162 : i64 nMasterJournal; /* Size of master journal file */
1163 :
1164 : /* Open the master journal file exclusively in case some other process
1165 : ** is running this routine also. Not that it makes too much difference.
1166 : */
1167 0 : rc = sqlite3OsOpenReadOnly(zMaster, &master);
1168 : assert( rc!=SQLITE_OK || master );
1169 0 : if( rc!=SQLITE_OK ) goto delmaster_out;
1170 0 : master_open = 1;
1171 0 : rc = sqlite3OsFileSize(master, &nMasterJournal);
1172 0 : if( rc!=SQLITE_OK ) goto delmaster_out;
1173 :
1174 0 : if( nMasterJournal>0 ){
1175 : char *zJournal;
1176 0 : char *zMasterPtr = 0;
1177 :
1178 : /* Load the entire master journal file into space obtained from
1179 : ** sqliteMalloc() and pointed to by zMasterJournal.
1180 : */
1181 0 : zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
1182 0 : if( !zMasterJournal ){
1183 0 : rc = SQLITE_NOMEM;
1184 0 : goto delmaster_out;
1185 : }
1186 0 : rc = sqlite3OsRead(master, zMasterJournal, nMasterJournal);
1187 0 : if( rc!=SQLITE_OK ) goto delmaster_out;
1188 :
1189 0 : zJournal = zMasterJournal;
1190 0 : while( (zJournal-zMasterJournal)<nMasterJournal ){
1191 0 : if( sqlite3OsFileExists(zJournal) ){
1192 : /* One of the journals pointed to by the master journal exists.
1193 : ** Open it and check if it points at the master journal. If
1194 : ** so, return without deleting the master journal file.
1195 : */
1196 0 : OsFile *journal = 0;
1197 : int c;
1198 :
1199 0 : rc = sqlite3OsOpenReadOnly(zJournal, &journal);
1200 : assert( rc!=SQLITE_OK || journal );
1201 0 : if( rc!=SQLITE_OK ){
1202 0 : goto delmaster_out;
1203 : }
1204 :
1205 0 : rc = readMasterJournal(journal, &zMasterPtr);
1206 0 : sqlite3OsClose(&journal);
1207 0 : if( rc!=SQLITE_OK ){
1208 0 : goto delmaster_out;
1209 : }
1210 :
1211 0 : c = zMasterPtr!=0 && strcmp(zMasterPtr, zMaster)==0;
1212 0 : sqliteFree(zMasterPtr);
1213 0 : if( c ){
1214 : /* We have a match. Do not delete the master journal file. */
1215 0 : goto delmaster_out;
1216 : }
1217 : }
1218 0 : zJournal += (strlen(zJournal)+1);
1219 : }
1220 : }
1221 :
1222 0 : rc = sqlite3OsDelete(zMaster);
1223 :
1224 0 : delmaster_out:
1225 0 : if( zMasterJournal ){
1226 0 : sqliteFree(zMasterJournal);
1227 : }
1228 0 : if( master_open ){
1229 0 : sqlite3OsClose(&master);
1230 : }
1231 0 : return rc;
1232 : }
1233 :
1234 :
1235 : static void pager_truncate_cache(Pager *pPager);
1236 :
1237 : /*
1238 : ** Truncate the main file of the given pager to the number of pages
1239 : ** indicated. Also truncate the cached representation of the file.
1240 : */
1241 1 : static int pager_truncate(Pager *pPager, int nPage){
1242 1 : int rc = SQLITE_OK;
1243 1 : if( pPager->state>=PAGER_EXCLUSIVE ){
1244 0 : rc = sqlite3OsTruncate(pPager->fd, pPager->pageSize*(i64)nPage);
1245 : }
1246 1 : if( rc==SQLITE_OK ){
1247 1 : pPager->dbSize = nPage;
1248 1 : pager_truncate_cache(pPager);
1249 : }
1250 1 : return rc;
1251 : }
1252 :
1253 : /*
1254 : ** Playback the journal and thus restore the database file to
1255 : ** the state it was in before we started making changes.
1256 : **
1257 : ** The journal file format is as follows:
1258 : **
1259 : ** (1) 8 byte prefix. A copy of aJournalMagic[].
1260 : ** (2) 4 byte big-endian integer which is the number of valid page records
1261 : ** in the journal. If this value is 0xffffffff, then compute the
1262 : ** number of page records from the journal size.
1263 : ** (3) 4 byte big-endian integer which is the initial value for the
1264 : ** sanity checksum.
1265 : ** (4) 4 byte integer which is the number of pages to truncate the
1266 : ** database to during a rollback.
1267 : ** (5) 4 byte integer which is the number of bytes in the master journal
1268 : ** name. The value may be zero (indicate that there is no master
1269 : ** journal.)
1270 : ** (6) N bytes of the master journal name. The name will be nul-terminated
1271 : ** and might be shorter than the value read from (5). If the first byte
1272 : ** of the name is \000 then there is no master journal. The master
1273 : ** journal name is stored in UTF-8.
1274 : ** (7) Zero or more pages instances, each as follows:
1275 : ** + 4 byte page number.
1276 : ** + pPager->pageSize bytes of data.
1277 : ** + 4 byte checksum
1278 : **
1279 : ** When we speak of the journal header, we mean the first 6 items above.
1280 : ** Each entry in the journal is an instance of the 7th item.
1281 : **
1282 : ** Call the value from the second bullet "nRec". nRec is the number of
1283 : ** valid page entries in the journal. In most cases, you can compute the
1284 : ** value of nRec from the size of the journal file. But if a power
1285 : ** failure occurred while the journal was being written, it could be the
1286 : ** case that the size of the journal file had already been increased but
1287 : ** the extra entries had not yet made it safely to disk. In such a case,
1288 : ** the value of nRec computed from the file size would be too large. For
1289 : ** that reason, we always use the nRec value in the header.
1290 : **
1291 : ** If the nRec value is 0xffffffff it means that nRec should be computed
1292 : ** from the file size. This value is used when the user selects the
1293 : ** no-sync option for the journal. A power failure could lead to corruption
1294 : ** in this case. But for things like temporary table (which will be
1295 : ** deleted when the power is restored) we don't care.
1296 : **
1297 : ** If the file opened as the journal file is not a well-formed
1298 : ** journal file then all pages up to the first corrupted page are rolled
1299 : ** back (or no pages if the journal header is corrupted). The journal file
1300 : ** is then deleted and SQLITE_OK returned, just as if no corruption had
1301 : ** been encountered.
1302 : **
1303 : ** If an I/O or malloc() error occurs, the journal-file is not deleted
1304 : ** and an error code is returned.
1305 : */
1306 1 : static int pager_playback(Pager *pPager, int isHot){
1307 : i64 szJ; /* Size of the journal file in bytes */
1308 : u32 nRec; /* Number of Records in the journal */
1309 : int i; /* Loop counter */
1310 1 : Pgno mxPg = 0; /* Size of the original file in pages */
1311 : int rc; /* Result code of a subroutine */
1312 1 : char *zMaster = 0; /* Name of master journal file if any */
1313 :
1314 : /* Figure out how many records are in the journal. Abort early if
1315 : ** the journal is empty.
1316 : */
1317 : assert( pPager->journalOpen );
1318 1 : rc = sqlite3OsFileSize(pPager->jfd, &szJ);
1319 1 : if( rc!=SQLITE_OK || szJ==0 ){
1320 : goto end_playback;
1321 : }
1322 :
1323 : /* Read the master journal name from the journal, if it is present.
1324 : ** If a master journal file name is specified, but the file is not
1325 : ** present on disk, then the journal is not hot and does not need to be
1326 : ** played back.
1327 : */
1328 1 : rc = readMasterJournal(pPager->jfd, &zMaster);
1329 : assert( rc!=SQLITE_DONE );
1330 1 : if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){
1331 0 : sqliteFree(zMaster);
1332 0 : zMaster = 0;
1333 0 : if( rc==SQLITE_DONE ) rc = SQLITE_OK;
1334 0 : goto end_playback;
1335 : }
1336 1 : sqlite3OsSeek(pPager->jfd, 0);
1337 1 : pPager->journalOff = 0;
1338 :
1339 : /* This loop terminates either when the readJournalHdr() call returns
1340 : ** SQLITE_DONE or an IO error occurs. */
1341 : while( 1 ){
1342 :
1343 : /* Read the next journal header from the journal file. If there are
1344 : ** not enough bytes left in the journal file for a complete header, or
1345 : ** it is corrupted, then a process must of failed while writing it.
1346 : ** This indicates nothing more needs to be rolled back.
1347 : */
1348 2 : rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
1349 2 : if( rc!=SQLITE_OK ){
1350 1 : if( rc==SQLITE_DONE ){
1351 1 : rc = SQLITE_OK;
1352 : }
1353 1 : goto end_playback;
1354 : }
1355 :
1356 : /* If nRec is 0xffffffff, then this journal was created by a process
1357 : ** working in no-sync mode. This means that the rest of the journal
1358 : ** file consists of pages, there are no more journal headers. Compute
1359 : ** the value of nRec based on this assumption.
1360 : */
1361 1 : if( nRec==0xffffffff ){
1362 : assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
1363 0 : nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
1364 : }
1365 :
1366 : /* If nRec is 0 and this rollback is of a transaction created by this
1367 : ** process. In this case the rest of the journal file consists of
1368 : ** journalled copies of pages that need to be read back into the cache.
1369 : */
1370 1 : if( nRec==0 && !isHot ){
1371 1 : nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
1372 : }
1373 :
1374 : /* If this is the first header read from the journal, truncate the
1375 : ** database file back to it's original size.
1376 : */
1377 1 : if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
1378 1 : rc = pager_truncate(pPager, mxPg);
1379 1 : if( rc!=SQLITE_OK ){
1380 0 : goto end_playback;
1381 : }
1382 : }
1383 :
1384 : /* Copy original pages out of the journal and back into the database file.
1385 : */
1386 2 : for(i=0; i<nRec; i++){
1387 1 : rc = pager_playback_one_page(pPager, pPager->jfd, 1);
1388 1 : if( rc!=SQLITE_OK ){
1389 0 : if( rc==SQLITE_DONE ){
1390 0 : rc = SQLITE_OK;
1391 0 : pPager->journalOff = szJ;
1392 0 : break;
1393 : }else{
1394 0 : goto end_playback;
1395 : }
1396 : }
1397 : }
1398 1 : }
1399 : /*NOTREACHED*/
1400 : assert( 0 );
1401 :
1402 1 : end_playback:
1403 1 : if( rc==SQLITE_OK ){
1404 1 : rc = pager_end_transaction(pPager);
1405 : }
1406 1 : if( zMaster ){
1407 : /* If there was a master journal and this routine will return success,
1408 : ** see if it is possible to delete the master journal.
1409 : */
1410 0 : if( rc==SQLITE_OK ){
1411 0 : rc = pager_delmaster(zMaster);
1412 : }
1413 0 : sqliteFree(zMaster);
1414 : }
1415 :
1416 : /* The Pager.sectorSize variable may have been updated while rolling
1417 : ** back a journal created by a process with a different sector size
1418 : ** value. Reset it to the correct value for this process.
1419 : */
1420 1 : pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
1421 1 : return rc;
1422 : }
1423 :
1424 : /*
1425 : ** Playback the statement journal.
1426 : **
1427 : ** This is similar to playing back the transaction journal but with
1428 : ** a few extra twists.
1429 : **
1430 : ** (1) The number of pages in the database file at the start of
1431 : ** the statement is stored in pPager->stmtSize, not in the
1432 : ** journal file itself.
1433 : **
1434 : ** (2) In addition to playing back the statement journal, also
1435 : ** playback all pages of the transaction journal beginning
1436 : ** at offset pPager->stmtJSize.
1437 : */
1438 0 : static int pager_stmt_playback(Pager *pPager){
1439 : i64 szJ; /* Size of the full journal */
1440 : i64 hdrOff;
1441 : int nRec; /* Number of Records */
1442 : int i; /* Loop counter */
1443 : int rc;
1444 :
1445 0 : szJ = pPager->journalOff;
1446 : #ifndef NDEBUG
1447 : {
1448 : i64 os_szJ;
1449 : rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
1450 : if( rc!=SQLITE_OK ) return rc;
1451 : assert( szJ==os_szJ );
1452 : }
1453 : #endif
1454 :
1455 : /* Set hdrOff to be the offset just after the end of the last journal
1456 : ** page written before the first journal-header for this statement
1457 : ** transaction was written, or the end of the file if no journal
1458 : ** header was written.
1459 : */
1460 0 : hdrOff = pPager->stmtHdrOff;
1461 : assert( pPager->fullSync || !hdrOff );
1462 0 : if( !hdrOff ){
1463 0 : hdrOff = szJ;
1464 : }
1465 :
1466 : /* Truncate the database back to its original size.
1467 : */
1468 0 : rc = pager_truncate(pPager, pPager->stmtSize);
1469 : assert( pPager->state>=PAGER_SHARED );
1470 :
1471 : /* Figure out how many records are in the statement journal.
1472 : */
1473 : assert( pPager->stmtInUse && pPager->journalOpen );
1474 0 : sqlite3OsSeek(pPager->stfd, 0);
1475 0 : nRec = pPager->stmtNRec;
1476 :
1477 : /* Copy original pages out of the statement journal and back into the
1478 : ** database file. Note that the statement journal omits checksums from
1479 : ** each record since power-failure recovery is not important to statement
1480 : ** journals.
1481 : */
1482 0 : for(i=nRec-1; i>=0; i--){
1483 0 : rc = pager_playback_one_page(pPager, pPager->stfd, 0);
1484 : assert( rc!=SQLITE_DONE );
1485 0 : if( rc!=SQLITE_OK ) goto end_stmt_playback;
1486 : }
1487 :
1488 : /* Now roll some pages back from the transaction journal. Pager.stmtJSize
1489 : ** was the size of the journal file when this statement was started, so
1490 : ** everything after that needs to be rolled back, either into the
1491 : ** database, the memory cache, or both.
1492 : **
1493 : ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
1494 : ** of the first journal header written during this statement transaction.
1495 : */
1496 0 : rc = sqlite3OsSeek(pPager->jfd, pPager->stmtJSize);
1497 0 : if( rc!=SQLITE_OK ){
1498 0 : goto end_stmt_playback;
1499 : }
1500 0 : pPager->journalOff = pPager->stmtJSize;
1501 0 : pPager->cksumInit = pPager->stmtCksum;
1502 0 : while( pPager->journalOff < hdrOff ){
1503 0 : rc = pager_playback_one_page(pPager, pPager->jfd, 1);
1504 : assert( rc!=SQLITE_DONE );
1505 0 : if( rc!=SQLITE_OK ) goto end_stmt_playback;
1506 : }
1507 :
1508 0 : while( pPager->journalOff < szJ ){
1509 : u32 nJRec; /* Number of Journal Records */
1510 : u32 dummy;
1511 0 : rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
1512 0 : if( rc!=SQLITE_OK ){
1513 : assert( rc!=SQLITE_DONE );
1514 0 : goto end_stmt_playback;
1515 : }
1516 0 : if( nJRec==0 ){
1517 0 : nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
1518 : }
1519 0 : for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
1520 0 : rc = pager_playback_one_page(pPager, pPager->jfd, 1);
1521 : assert( rc!=SQLITE_DONE );
1522 0 : if( rc!=SQLITE_OK ) goto end_stmt_playback;
1523 : }
1524 : }
1525 :
1526 0 : pPager->journalOff = szJ;
1527 :
1528 0 : end_stmt_playback:
1529 0 : if( rc==SQLITE_OK) {
1530 0 : pPager->journalOff = szJ;
1531 : /* pager_reload_cache(pPager); */
1532 : }
1533 0 : return rc;
1534 : }
1535 :
1536 : /*
1537 : ** Change the maximum number of in-memory pages that are allowed.
1538 : */
1539 238 : void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
1540 238 : if( mxPage>10 ){
1541 238 : pPager->mxPage = mxPage;
1542 : }else{
1543 0 : pPager->mxPage = 10;
1544 : }
1545 238 : }
1546 :
1547 : /*
1548 : ** Adjust the robustness of the database to damage due to OS crashes
1549 : ** or power failures by changing the number of syncs()s when writing
1550 : ** the rollback journal. There are three levels:
1551 : **
1552 : ** OFF sqlite3OsSync() is never called. This is the default
1553 : ** for temporary and transient files.
1554 : **
1555 : ** NORMAL The journal is synced once before writes begin on the
1556 : ** database. This is normally adequate protection, but
1557 : ** it is theoretically possible, though very unlikely,
1558 : ** that an inopertune power failure could leave the journal
1559 : ** in a state which would cause damage to the database
1560 : ** when it is rolled back.
1561 : **
1562 : ** FULL The journal is synced twice before writes begin on the
1563 : ** database (with some additional information - the nRec field
1564 : ** of the journal header - being written in between the two
1565 : ** syncs). If we assume that writing a
1566 : ** single disk sector is atomic, then this mode provides
1567 : ** assurance that the journal will not be corrupted to the
1568 : ** point of causing damage to the database during rollback.
1569 : **
1570 : ** Numeric values associated with these states are OFF==1, NORMAL=2,
1571 : ** and FULL=3.
1572 : */
1573 : #ifndef SQLITE_OMIT_PAGER_PRAGMAS
1574 0 : void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int full_fsync){
1575 0 : pPager->noSync = level==1 || pPager->tempFile;
1576 0 : pPager->fullSync = level==3 && !pPager->tempFile;
1577 0 : pPager->full_fsync = full_fsync;
1578 0 : if( pPager->noSync ) pPager->needSync = 0;
1579 0 : }
1580 : #endif
1581 :
1582 : /*
1583 : ** The following global variable is incremented whenever the library
1584 : ** attempts to open a temporary file. This information is used for
1585 : ** testing and analysis only.
1586 : */
1587 : #ifdef SQLITE_TEST
1588 : int sqlite3_opentemp_count = 0;
1589 : #endif
1590 :
1591 : /*
1592 : ** Open a temporary file.
1593 : **
1594 : ** Write the file descriptor into *fd. Return SQLITE_OK on success or some
1595 : ** other error code if we fail.
1596 : **
1597 : ** The OS will automatically delete the temporary file when it is
1598 : ** closed.
1599 : */
1600 0 : static int sqlite3PagerOpentemp(OsFile **pFd){
1601 0 : int cnt = 8;
1602 : int rc;
1603 : char zFile[SQLITE_TEMPNAME_SIZE];
1604 :
1605 : #ifdef SQLITE_TEST
1606 : sqlite3_opentemp_count++; /* Used for testing and analysis only */
1607 : #endif
1608 : do{
1609 0 : cnt--;
1610 0 : sqlite3OsTempFileName(zFile);
1611 0 : rc = sqlite3OsOpenExclusive(zFile, pFd, 1);
1612 : assert( rc!=SQLITE_OK || *pFd );
1613 0 : }while( cnt>0 && rc!=SQLITE_OK && rc!=SQLITE_NOMEM );
1614 0 : return rc;
1615 : }
1616 :
1617 : /*
1618 : ** Create a new page cache and put a pointer to the page cache in *ppPager.
1619 : ** The file to be cached need not exist. The file is not locked until
1620 : ** the first call to sqlite3PagerGet() and is only held open until the
1621 : ** last page is released using sqlite3PagerUnref().
1622 : **
1623 : ** If zFilename is NULL then a randomly-named temporary file is created
1624 : ** and used as the file to be cached. The file will be deleted
1625 : ** automatically when it is closed.
1626 : **
1627 : ** If zFilename is ":memory:" then all information is held in cache.
1628 : ** It is never written to disk. This can be used to implement an
1629 : ** in-memory database.
1630 : */
1631 : int sqlite3PagerOpen(
1632 : Pager **ppPager, /* Return the Pager structure here */
1633 : const char *zFilename, /* Name of the database file to open */
1634 : int nExtra, /* Extra bytes append to each in-memory page */
1635 : int flags /* flags controlling this file */
1636 130 : ){
1637 130 : Pager *pPager = 0;
1638 130 : char *zFullPathname = 0;
1639 : int nameLen; /* Compiler is wrong. This is always initialized before use */
1640 130 : OsFile *fd = 0;
1641 130 : int rc = SQLITE_OK;
1642 : int i;
1643 130 : int tempFile = 0;
1644 130 : int memDb = 0;
1645 130 : int readOnly = 0;
1646 130 : int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
1647 130 : int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
1648 : char zTemp[SQLITE_TEMPNAME_SIZE];
1649 : #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1650 : /* A malloc() cannot fail in sqlite3ThreadData() as one or more calls to
1651 : ** malloc() must have already been made by this thread before it gets
1652 : ** to this point. This means the ThreadData must have been allocated already
1653 : ** so that ThreadData.nAlloc can be set. It would be nice to assert
1654 : ** that ThreadData.nAlloc is non-zero, but alas this breaks test cases
1655 : ** written to invoke the pager directly.
1656 : */
1657 : ThreadData *pTsd = sqlite3ThreadData();
1658 : assert( pTsd );
1659 : #endif
1660 :
1661 : /* We used to test if malloc() had already failed before proceeding.
1662 : ** But the way this function is used in SQLite means that can never
1663 : ** happen. Furthermore, if the malloc-failed flag is already set,
1664 : ** either the call to sqliteStrDup() or sqliteMalloc() below will
1665 : ** fail shortly and SQLITE_NOMEM returned anyway.
1666 : */
1667 130 : *ppPager = 0;
1668 :
1669 : /* Open the pager file and set zFullPathname to point at malloc()ed
1670 : ** memory containing the complete filename (i.e. including the directory).
1671 : */
1672 260 : if( zFilename && zFilename[0] ){
1673 : #ifndef SQLITE_OMIT_MEMORYDB
1674 130 : if( strcmp(zFilename,":memory:")==0 ){
1675 117 : memDb = 1;
1676 117 : zFullPathname = sqliteStrDup("");
1677 : }else
1678 : #endif
1679 : {
1680 13 : zFullPathname = sqlite3OsFullPathname(zFilename);
1681 13 : if( zFullPathname ){
1682 13 : rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly);
1683 : assert( rc!=SQLITE_OK || fd );
1684 : }
1685 : }
1686 : }else{
1687 0 : rc = sqlite3PagerOpentemp(&fd);
1688 0 : sqlite3OsTempFileName(zTemp);
1689 0 : zFilename = zTemp;
1690 0 : zFullPathname = sqlite3OsFullPathname(zFilename);
1691 0 : if( rc==SQLITE_OK ){
1692 0 : tempFile = 1;
1693 : }
1694 : }
1695 :
1696 : /* Allocate the Pager structure. As part of the same allocation, allocate
1697 : ** space for the full paths of the file, directory and journal
1698 : ** (Pager.zFilename, Pager.zDirectory and Pager.zJournal).
1699 : */
1700 130 : if( zFullPathname ){
1701 130 : nameLen = strlen(zFullPathname);
1702 130 : pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
1703 130 : if( pPager && rc==SQLITE_OK ){
1704 130 : pPager->pTmpSpace = (char *)sqliteMallocRaw(SQLITE_DEFAULT_PAGE_SIZE);
1705 : }
1706 : }
1707 :
1708 :
1709 : /* If an error occured in either of the blocks above, free the memory
1710 : ** pointed to by zFullPathname, free the Pager structure and close the
1711 : ** file. Since the pager is not allocated there is no need to set
1712 : ** any Pager.errMask variables.
1713 : */
1714 130 : if( !pPager || !zFullPathname || !pPager->pTmpSpace || rc!=SQLITE_OK ){
1715 0 : sqlite3OsClose(&fd);
1716 0 : sqliteFree(zFullPathname);
1717 0 : sqliteFree(pPager);
1718 0 : return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
1719 : }
1720 :
1721 : PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(fd), zFullPathname);
1722 : IOTRACE(("OPEN %p %s\n", pPager, zFullPathname))
1723 130 : pPager->zFilename = (char*)&pPager[1];
1724 130 : pPager->zDirectory = &pPager->zFilename[nameLen+1];
1725 130 : pPager->zJournal = &pPager->zDirectory[nameLen+1];
1726 130 : strcpy(pPager->zFilename, zFullPathname);
1727 130 : strcpy(pPager->zDirectory, zFullPathname);
1728 :
1729 130 : for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
1730 130 : if( i>0 ) pPager->zDirectory[i-1] = 0;
1731 130 : strcpy(pPager->zJournal, zFullPathname);
1732 130 : sqliteFree(zFullPathname);
1733 130 : strcpy(&pPager->zJournal[nameLen], "-journal");
1734 130 : pPager->fd = fd;
1735 : /* pPager->journalOpen = 0; */
1736 130 : pPager->useJournal = useJournal && !memDb;
1737 130 : pPager->noReadlock = noReadlock && readOnly;
1738 : /* pPager->stmtOpen = 0; */
1739 : /* pPager->stmtInUse = 0; */
1740 : /* pPager->nRef = 0; */
1741 130 : pPager->dbSize = memDb-1;
1742 130 : pPager->pageSize = SQLITE_DEFAULT_PAGE_SIZE;
1743 : /* pPager->stmtSize = 0; */
1744 : /* pPager->stmtJSize = 0; */
1745 : /* pPager->nPage = 0; */
1746 : /* pPager->nMaxPage = 0; */
1747 130 : pPager->mxPage = 100;
1748 : assert( PAGER_UNLOCK==0 );
1749 : /* pPager->state = PAGER_UNLOCK; */
1750 : /* pPager->errMask = 0; */
1751 130 : pPager->tempFile = tempFile;
1752 : assert( tempFile==PAGER_LOCKINGMODE_NORMAL
1753 : || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
1754 : assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
1755 130 : pPager->exclusiveMode = tempFile;
1756 130 : pPager->memDb = memDb;
1757 130 : pPager->readOnly = readOnly;
1758 : /* pPager->needSync = 0; */
1759 130 : pPager->noSync = pPager->tempFile || !useJournal;
1760 130 : pPager->fullSync = (pPager->noSync?0:1);
1761 : /* pPager->pFirst = 0; */
1762 : /* pPager->pFirstSynced = 0; */
1763 : /* pPager->pLast = 0; */
1764 130 : pPager->nExtra = FORCE_ALIGNMENT(nExtra);
1765 : assert(fd||memDb);
1766 130 : if( !memDb ){
1767 13 : pPager->sectorSize = sqlite3OsSectorSize(fd);
1768 : }
1769 : /* pPager->pBusyHandler = 0; */
1770 : /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
1771 130 : *ppPager = pPager;
1772 : #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
1773 : pPager->pNext = pTsd->pPager;
1774 : pTsd->pPager = pPager;
1775 : #endif
1776 130 : return SQLITE_OK;
1777 : }
1778 :
1779 : /*
1780 : ** Set the busy handler function.
1781 : */
1782 130 : void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
1783 130 : pPager->pBusyHandler = pBusyHandler;
1784 130 : }
1785 :
1786 : /*
1787 : ** Set the destructor for this pager. If not NULL, the destructor is called
1788 : ** when the reference count on each page reaches zero. The destructor can
1789 : ** be used to clean up information in the extra segment appended to each page.
1790 : **
1791 : ** The destructor is not called as a result sqlite3PagerClose().
1792 : ** Destructors are only called by sqlite3PagerUnref().
1793 : */
1794 130 : void sqlite3PagerSetDestructor(Pager *pPager, void (*xDesc)(DbPage*,int)){
1795 130 : pPager->xDestructor = xDesc;
1796 130 : }
1797 :
1798 : /*
1799 : ** Set the reinitializer for this pager. If not NULL, the reinitializer
1800 : ** is called when the content of a page in cache is restored to its original
1801 : ** value as a result of a rollback. The callback gives higher-level code
1802 : ** an opportunity to restore the EXTRA section to agree with the restored
1803 : ** page data.
1804 : */
1805 130 : void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){
1806 130 : pPager->xReiniter = xReinit;
1807 130 : }
1808 :
1809 : /*
1810 : ** Set the page size. Return the new size. If the suggest new page
1811 : ** size is inappropriate, then an alternative page size is selected
1812 : ** and returned.
1813 : */
1814 130 : int sqlite3PagerSetPagesize(Pager *pPager, int pageSize){
1815 : assert( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE );
1816 130 : if( !pPager->memDb && pPager->nRef==0 ){
1817 13 : pager_reset(pPager);
1818 13 : pPager->pageSize = pageSize;
1819 13 : pPager->pTmpSpace = sqlite3ReallocOrFree(pPager->pTmpSpace, pageSize);
1820 : }
1821 130 : return pPager->pageSize;
1822 : }
1823 :
1824 : /*
1825 : ** The following set of routines are used to disable the simulated
1826 : ** I/O error mechanism. These routines are used to avoid simulated
1827 : ** errors in places where we do not care about errors.
1828 : **
1829 : ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
1830 : ** and generate no code.
1831 : */
1832 : #ifdef SQLITE_TEST
1833 : extern int sqlite3_io_error_pending;
1834 : extern int sqlite3_io_error_hit;
1835 : static int saved_cnt;
1836 : void disable_simulated_io_errors(void){
1837 : saved_cnt = sqlite3_io_error_pending;
1838 : sqlite3_io_error_pending = -1;
1839 : }
1840 : void enable_simulated_io_errors(void){
1841 : sqlite3_io_error_pending = saved_cnt;
1842 : }
1843 : #else
1844 : # define disable_simulated_io_errors()
1845 : # define enable_simulated_io_errors()
1846 : #endif
1847 :
1848 : /*
1849 : ** Read the first N bytes from the beginning of the file into memory
1850 : ** that pDest points to.
1851 : **
1852 : ** No error checking is done. The rational for this is that this function
1853 : ** may be called even if the file does not exist or contain a header. In
1854 : ** these cases sqlite3OsRead() will return an error, to which the correct
1855 : ** response is to zero the memory at pDest and continue. A real IO error
1856 : ** will presumably recur and be picked up later (Todo: Think about this).
1857 : */
1858 130 : int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
1859 130 : int rc = SQLITE_OK;
1860 130 : memset(pDest, 0, N);
1861 130 : if( MEMDB==0 ){
1862 : disable_simulated_io_errors();
1863 13 : sqlite3OsSeek(pPager->fd, 0);
1864 : enable_simulated_io_errors();
1865 : IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
1866 13 : rc = sqlite3OsRead(pPager->fd, pDest, N);
1867 13 : if( rc==SQLITE_IOERR_SHORT_READ ){
1868 6 : rc = SQLITE_OK;
1869 : }
1870 : }
1871 130 : return rc;
1872 : }
1873 :
1874 : /*
1875 : ** Return the total number of pages in the disk file associated with
1876 : ** pPager.
1877 : **
1878 : ** If the PENDING_BYTE lies on the page directly after the end of the
1879 : ** file, then consider this page part of the file too. For example, if
1880 : ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
1881 : ** file is 4096 bytes, 5 is returned instead of 4.
1882 : */
1883 2009 : int sqlite3PagerPagecount(Pager *pPager){
1884 : i64 n;
1885 : int rc;
1886 : assert( pPager!=0 );
1887 2009 : if( pPager->errCode ){
1888 0 : return 0;
1889 : }
1890 2009 : if( pPager->dbSize>=0 ){
1891 1988 : n = pPager->dbSize;
1892 : } else {
1893 21 : if( (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
1894 0 : pager_error(pPager, rc);
1895 0 : return 0;
1896 : }
1897 21 : if( n>0 && n<pPager->pageSize ){
1898 0 : n = 1;
1899 : }else{
1900 21 : n /= pPager->pageSize;
1901 : }
1902 21 : if( pPager->state!=PAGER_UNLOCK ){
1903 21 : pPager->dbSize = n;
1904 : }
1905 : }
1906 2009 : if( n==(PENDING_BYTE/pPager->pageSize) ){
1907 0 : n++;
1908 : }
1909 2009 : return n;
1910 : }
1911 :
1912 :
1913 : #ifndef SQLITE_OMIT_MEMORYDB
1914 : /*
1915 : ** Clear a PgHistory block
1916 : */
1917 412 : static void clearHistory(PgHistory *pHist){
1918 412 : sqliteFree(pHist->pOrig);
1919 412 : sqliteFree(pHist->pStmt);
1920 412 : pHist->pOrig = 0;
1921 412 : pHist->pStmt = 0;
1922 412 : }
1923 : #else
1924 : #define clearHistory(x)
1925 : #endif
1926 :
1927 : /*
1928 : ** Forward declaration
1929 : */
1930 : static int syncJournal(Pager*);
1931 :
1932 : /*
1933 : ** Unlink pPg from it's hash chain. Also set the page number to 0 to indicate
1934 : ** that the page is not part of any hash chain. This is required because the
1935 : ** sqlite3PagerMovepage() routine can leave a page in the
1936 : ** pNextFree/pPrevFree list that is not a part of any hash-chain.
1937 : */
1938 0 : static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
1939 0 : if( pPg->pgno==0 ){
1940 : assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
1941 0 : return;
1942 : }
1943 0 : if( pPg->pNextHash ){
1944 0 : pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1945 : }
1946 0 : if( pPg->pPrevHash ){
1947 : assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
1948 0 : pPg->pPrevHash->pNextHash = pPg->pNextHash;
1949 : }else{
1950 0 : int h = pPg->pgno & (pPager->nHash-1);
1951 0 : pPager->aHash[h] = pPg->pNextHash;
1952 : }
1953 0 : if( MEMDB ){
1954 0 : clearHistory(PGHDR_TO_HIST(pPg, pPager));
1955 : }
1956 0 : pPg->pgno = 0;
1957 0 : pPg->pNextHash = pPg->pPrevHash = 0;
1958 : }
1959 :
1960 : /*
1961 : ** Unlink a page from the free list (the list of all pages where nRef==0)
1962 : ** and from its hash collision chain.
1963 : */
1964 0 : static void unlinkPage(PgHdr *pPg){
1965 0 : Pager *pPager = pPg->pPager;
1966 :
1967 : /* Keep the pFirstSynced pointer pointing at the first synchronized page */
1968 0 : if( pPg==pPager->pFirstSynced ){
1969 0 : PgHdr *p = pPg->pNextFree;
1970 0 : while( p && p->needSync ){ p = p->pNextFree; }
1971 0 : pPager->pFirstSynced = p;
1972 : }
1973 :
1974 : /* Unlink from the freelist */
1975 0 : if( pPg->pPrevFree ){
1976 0 : pPg->pPrevFree->pNextFree = pPg->pNextFree;
1977 : }else{
1978 : assert( pPager->pFirst==pPg );
1979 0 : pPager->pFirst = pPg->pNextFree;
1980 : }
1981 0 : if( pPg->pNextFree ){
1982 0 : pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1983 : }else{
1984 : assert( pPager->pLast==pPg );
1985 0 : pPager->pLast = pPg->pPrevFree;
1986 : }
1987 0 : pPg->pNextFree = pPg->pPrevFree = 0;
1988 :
1989 : /* Unlink from the pgno hash table */
1990 0 : unlinkHashChain(pPager, pPg);
1991 0 : }
1992 :
1993 : /*
1994 : ** This routine is used to truncate the cache when a database
1995 : ** is truncated. Drop from the cache all pages whose pgno is
1996 : ** larger than pPager->dbSize and is unreferenced.
1997 : **
1998 : ** Referenced pages larger than pPager->dbSize are zeroed.
1999 : **
2000 : ** Actually, at the point this routine is called, it would be
2001 : ** an error to have a referenced page. But rather than delete
2002 : ** that page and guarantee a subsequent segfault, it seems better
2003 : ** to zero it and hope that we error out sanely.
2004 : */
2005 2 : static void pager_truncate_cache(Pager *pPager){
2006 : PgHdr *pPg;
2007 : PgHdr **ppPg;
2008 2 : int dbSize = pPager->dbSize;
2009 :
2010 2 : ppPg = &pPager->pAll;
2011 9 : while( (pPg = *ppPg)!=0 ){
2012 5 : if( pPg->pgno<=dbSize ){
2013 5 : ppPg = &pPg->pNextAll;
2014 0 : }else if( pPg->nRef>0 ){
2015 0 : memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
2016 0 : ppPg = &pPg->pNextAll;
2017 : }else{
2018 0 : *ppPg = pPg->pNextAll;
2019 : IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
2020 : PAGER_INCR(sqlite3_pager_pgfree_count);
2021 0 : unlinkPage(pPg);
2022 0 : makeClean(pPg);
2023 0 : sqliteFree(pPg);
2024 0 : pPager->nPage--;
2025 : }
2026 : }
2027 2 : }
2028 :
2029 : /*
2030 : ** Try to obtain a lock on a file. Invoke the busy callback if the lock
2031 : ** is currently not available. Repeat until the busy callback returns
2032 : ** false or until the lock succeeds.
2033 : **
2034 : ** Return SQLITE_OK on success and an error code if we cannot obtain
2035 : ** the lock.
2036 : */
2037 35 : static int pager_wait_on_lock(Pager *pPager, int locktype){
2038 : int rc;
2039 :
2040 : /* The OS lock values must be the same as the Pager lock values */
2041 : assert( PAGER_SHARED==SHARED_LOCK );
2042 : assert( PAGER_RESERVED==RESERVED_LOCK );
2043 : assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
2044 :
2045 : /* If the file is currently unlocked then the size must be unknown */
2046 : assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
2047 :
2048 35 : if( pPager->state>=locktype ){
2049 0 : rc = SQLITE_OK;
2050 : }else{
2051 : do {
2052 35 : rc = sqlite3OsLock(pPager->fd, locktype);
2053 35 : }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
2054 35 : if( rc==SQLITE_OK ){
2055 35 : pPager->state = locktype;
2056 : IOTRACE(("LOCK %p %d\n", pPager, locktype))
2057 : }
2058 : }
2059 35 : return rc;
2060 : }
2061 :
2062 : /*
2063 : ** Truncate the file to the number of pages specified.
2064 : */
2065 0 : int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
2066 : int rc;
2067 : assert( pPager->state>=PAGER_SHARED || MEMDB );
2068 0 : sqlite3PagerPagecount(pPager);
2069 0 : if( pPager->errCode ){
2070 0 : rc = pPager->errCode;
2071 0 : return rc;
2072 : }
2073 0 : if( nPage>=(unsigned)pPager->dbSize ){
2074 0 : return SQLITE_OK;
2075 : }
2076 0 : if( MEMDB ){
2077 0 : pPager->dbSize = nPage;
2078 0 : pager_truncate_cache(pPager);
2079 0 : return SQLITE_OK;
2080 : }
2081 0 : rc = syncJournal(pPager);
2082 0 : if( rc!=SQLITE_OK ){
2083 0 : return rc;
2084 : }
2085 :
2086 : /* Get an exclusive lock on the database before truncating. */
2087 0 : rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
2088 0 : if( rc!=SQLITE_OK ){
2089 0 : return rc;
2090 : }
2091 :
2092 0 : rc = pager_truncate(pPager, nPage);
2093 0 : return rc;
2094 : }
2095 :
2096 : /*
2097 : ** Shutdown the page cache. Free all memory and close all files.
2098 : **
2099 : ** If a transaction was in progress when this routine is called, that
2100 : ** transaction is rolled back. All outstanding pages are invalidated
2101 : ** and their memory is freed. Any attempt to use a page associated
2102 : ** with this page cache after this function returns will likely
2103 : ** result in a coredump.
2104 : **
2105 : ** This function always succeeds. If a transaction is active an attempt
2106 : ** is made to roll it back. If an error occurs during the rollback
2107 : ** a hot journal may be left in the filesystem but no error is returned
2108 : ** to the caller.
2109 : */
2110 129 : int sqlite3PagerClose(Pager *pPager){
2111 : #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
2112 : /* A malloc() cannot fail in sqlite3ThreadData() as one or more calls to
2113 : ** malloc() must have already been made by this thread before it gets
2114 : ** to this point. This means the ThreadData must have been allocated already
2115 : ** so that ThreadData.nAlloc can be set.
2116 : */
2117 : ThreadData *pTsd = sqlite3ThreadData();
2118 : assert( pPager );
2119 : assert( pTsd && pTsd->nAlloc );
2120 : #endif
2121 :
2122 : disable_simulated_io_errors();
2123 129 : pPager->errCode = 0;
2124 129 : pPager->exclusiveMode = 0;
2125 129 : pager_reset(pPager);
2126 129 : pagerUnlockAndRollback(pPager);
2127 : enable_simulated_io_errors();
2128 : PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
2129 : IOTRACE(("CLOSE %p\n", pPager))
2130 : assert( pPager->errCode || (pPager->journalOpen==0 && pPager->stmtOpen==0) );
2131 129 : if( pPager->journalOpen ){
2132 0 : sqlite3OsClose(&pPager->jfd);
2133 : }
2134 129 : sqliteFree(pPager->aInJournal);
2135 129 : if( pPager->stmtOpen ){
2136 0 : sqlite3OsClose(&pPager->stfd);
2137 : }
2138 129 : sqlite3OsClose(&pPager->fd);
2139 : /* Temp files are automatically deleted by the OS
2140 : ** if( pPager->tempFile ){
2141 : ** sqlite3OsDelete(pPager->zFilename);
2142 : ** }
2143 : */
2144 :
2145 : #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
2146 : /* Remove the pager from the linked list of pagers starting at
2147 : ** ThreadData.pPager if memory-management is enabled.
2148 : */
2149 : if( pPager==pTsd->pPager ){
2150 : pTsd->pPager = pPager->pNext;
2151 : }else{
2152 : Pager *pTmp;
2153 : for(pTmp = pTsd->pPager; pTmp->pNext!=pPager; pTmp=pTmp->pNext){}
2154 : pTmp->pNext = pPager->pNext;
2155 : }
2156 : #endif
2157 129 : sqliteFree(pPager->aHash);
2158 129 : sqliteFree(pPager->pTmpSpace);
2159 129 : sqliteFree(pPager);
2160 129 : return SQLITE_OK;
2161 : }
2162 :
2163 : /*
2164 : ** Return the page number for the given page data.
2165 : */
2166 0 : Pgno sqlite3PagerPagenumber(DbPage *p){
2167 0 : return p->pgno;
2168 : }
2169 :
2170 : /*
2171 : ** The page_ref() function increments the reference count for a page.
2172 : ** If the page is currently on the freelist (the reference count is zero) then
2173 : ** remove it from the freelist.
2174 : **
2175 : ** For non-test systems, page_ref() is a macro that calls _page_ref()
2176 : ** online of the reference count is zero. For test systems, page_ref()
2177 : ** is a real function so that we can set breakpoints and trace it.
2178 : */
2179 1096 : static void _page_ref(PgHdr *pPg){
2180 1096 : if( pPg->nRef==0 ){
2181 : /* The page is currently on the freelist. Remove it. */
2182 1096 : if( pPg==pPg->pPager->pFirstSynced ){
2183 675 : PgHdr *p = pPg->pNextFree;
2184 675 : while( p && p->needSync ){ p = p->pNextFree; }
2185 675 : pPg->pPager->pFirstSynced = p;
2186 : }
2187 1096 : if( pPg->pPrevFree ){
2188 387 : pPg->pPrevFree->pNextFree = pPg->pNextFree;
2189 : }else{
2190 709 : pPg->pPager->pFirst = pPg->pNextFree;
2191 : }
2192 1096 : if( pPg->pNextFree ){
2193 194 : pPg->pNextFree->pPrevFree = pPg->pPrevFree;
2194 : }else{
2195 902 : pPg->pPager->pLast = pPg->pPrevFree;
2196 : }
2197 1096 : pPg->pPager->nRef++;
2198 : }
2199 1096 : pPg->nRef++;
2200 : REFINFO(pPg);
2201 1096 : }
2202 : #ifdef SQLITE_DEBUG
2203 : static void page_ref(PgHdr *pPg){
2204 : if( pPg->nRef==0 ){
2205 : _page_ref(pPg);
2206 : }else{
2207 : pPg->nRef++;
2208 : REFINFO(pPg);
2209 : }
2210 : }
2211 : #else
2212 : # define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
2213 : #endif
2214 :
2215 : /*
2216 : ** Increment the reference count for a page. The input pointer is
2217 : ** a reference to the page data.
2218 : */
2219 0 : int sqlite3PagerRef(DbPage *pPg){
2220 0 : page_ref(pPg);
2221 0 : return SQLITE_OK;
2222 : }
2223 :
2224 : /*
2225 : ** Sync the journal. In other words, make sure all the pages that have
2226 : ** been written to the journal have actually reached the surface of the
2227 : ** disk. It is not safe to modify the original database file until after
2228 : ** the journal has been synced. If the original database is modified before
2229 : ** the journal is synced and a power failure occurs, the unsynced journal
2230 : ** data would be lost and we would be unable to completely rollback the
2231 : ** database changes. Database corruption would occur.
2232 : **
2233 : ** This routine also updates the nRec field in the header of the journal.
2234 : ** (See comments on the pager_playback() routine for additional information.)
2235 : ** If the sync mode is FULL, two syncs will occur. First the whole journal
2236 : ** is synced, then the nRec field is updated, then a second sync occurs.
2237 : **
2238 : ** For temporary databases, we do not care if we are able to rollback
2239 : ** after a power failure, so sync occurs.
2240 : **
2241 : ** This routine clears the needSync field of every page current held in
2242 : ** memory.
2243 : */
2244 14 : static int syncJournal(Pager *pPager){
2245 : PgHdr *pPg;
2246 14 : int rc = SQLITE_OK;
2247 :
2248 : /* Sync the journal before modifying the main database
2249 : ** (assuming there is a journal and it needs to be synced.)
2250 : */
2251 14 : if( pPager->needSync ){
2252 14 : if( !pPager->tempFile ){
2253 : assert( pPager->journalOpen );
2254 : /* assert( !pPager->noSync ); // noSync might be set if synchronous
2255 : ** was turned off after the transaction was started. Ticket #615 */
2256 : #ifndef NDEBUG
2257 : {
2258 : /* Make sure the pPager->nRec counter we are keeping agrees
2259 : ** with the nRec computed from the size of the journal file.
2260 : */
2261 : i64 jSz;
2262 : rc = sqlite3OsFileSize(pPager->jfd, &jSz);
2263 : if( rc!=0 ) return rc;
2264 : assert( pPager->journalOff==jSz );
2265 : }
2266 : #endif
2267 : {
2268 : /* Write the nRec value into the journal file header. If in
2269 : ** full-synchronous mode, sync the journal first. This ensures that
2270 : ** all data has really hit the disk before nRec is updated to mark
2271 : ** it as a candidate for rollback.
2272 : */
2273 14 : if( pPager->fullSync ){
2274 : PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
2275 : IOTRACE(("JSYNC %p\n", pPager))
2276 14 : rc = sqlite3OsSync(pPager->jfd, 0);
2277 14 : if( rc!=0 ) return rc;
2278 : }
2279 14 : rc = sqlite3OsSeek(pPager->jfd,
2280 : pPager->journalHdr + sizeof(aJournalMagic));
2281 14 : if( rc ) return rc;
2282 : IOTRACE(("JHDR %p %lld %d\n", pPager,
2283 : pPager->journalHdr + sizeof(aJournalMagic), 4))
2284 14 : rc = write32bits(pPager->jfd, pPager->nRec);
2285 14 : if( rc ) return rc;
2286 :
2287 14 : rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
2288 14 : if( rc ) return rc;
2289 : }
2290 : PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
2291 : IOTRACE(("JSYNC %d\n", pPager))
2292 14 : rc = sqlite3OsSync(pPager->jfd, pPager->full_fsync);
2293 14 : if( rc!=0 ) return rc;
2294 14 : pPager->journalStarted = 1;
2295 : }
2296 14 : pPager->needSync = 0;
2297 :
2298 : /* Erase the needSync flag from every page.
2299 : */
2300 42 : for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2301 28 : pPg->needSync = 0;
2302 : }
2303 14 : pPager->pFirstSynced = pPager->pFirst;
2304 : }
2305 :
2306 : #ifndef NDEBUG
2307 : /* If the Pager.needSync flag is clear then the PgHdr.needSync
2308 : ** flag must also be clear for all pages. Verify that this
2309 : ** invariant is true.
2310 : */
2311 : else{
2312 : for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2313 : assert( pPg->needSync==0 );
2314 : }
2315 : assert( pPager->pFirstSynced==pPager->pFirst );
2316 : }
2317 : #endif
2318 :
2319 14 : return rc;
2320 : }
2321 :
2322 : /*
2323 : ** Merge two lists of pages connected by pDirty and in pgno order.
2324 : ** Do not both fixing the pPrevDirty pointers.
2325 : */
2326 349 : static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){
2327 : PgHdr result, *pTail;
2328 349 : pTail = &result;
2329 711 : while( pA && pB ){
2330 13 : if( pA->pgno<pB->pgno ){
2331 7 : pTail->pDirty = pA;
2332 7 : pTail = pA;
2333 7 : pA = pA->pDirty;
2334 : }else{
2335 6 : pTail->pDirty = pB;
2336 6 : pTail = pB;
2337 6 : pB = pB->pDirty;
2338 : }
2339 : }
2340 349 : if( pA ){
2341 329 : pTail->pDirty = pA;
2342 20 : }else if( pB ){
2343 20 : pTail->pDirty = pB;
2344 : }else{
2345 0 : pTail->pDirty = 0;
2346 : }
2347 349 : return result.pDirty;
2348 : }
2349 :
2350 : /*
2351 : ** Sort the list of pages in accending order by pgno. Pages are
2352 : ** connected by pDirty pointers. The pPrevDirty pointers are
2353 : ** corrupted by this sort.
2354 : */
2355 : #define N_SORT_BUCKET_ALLOC 25
2356 : #define N_SORT_BUCKET 25
2357 : #ifdef SQLITE_TEST
2358 : int sqlite3_pager_n_sort_bucket = 0;
2359 : #undef N_SORT_BUCKET
2360 : #define N_SORT_BUCKET \
2361 : (sqlite3_pager_n_sort_bucket?sqlite3_pager_n_sort_bucket:N_SORT_BUCKET_ALLOC)
2362 : #endif
2363 14 : static PgHdr *sort_pagelist(PgHdr *pIn){
2364 : PgHdr *a[N_SORT_BUCKET_ALLOC], *p;
2365 : int i;
2366 14 : memset(a, 0, sizeof(a));
2367 55 : while( pIn ){
2368 27 : p = pIn;
2369 27 : pIn = p->pDirty;
2370 27 : p->pDirty = 0;
2371 40 : for(i=0; i<N_SORT_BUCKET-1; i++){
2372 40 : if( a[i]==0 ){
2373 27 : a[i] = p;
2374 27 : break;
2375 : }else{
2376 13 : p = merge_pagelist(a[i], p);
2377 13 : a[i] = 0;
2378 : }
2379 : }
2380 27 : if( i==N_SORT_BUCKET-1 ){
2381 : /* Coverage: To get here, there need to be 2^(N_SORT_BUCKET)
2382 : ** elements in the input list. This is possible, but impractical.
2383 : ** Testing this line is the point of global variable
2384 : ** sqlite3_pager_n_sort_bucket.
2385 : */
2386 0 : a[i] = merge_pagelist(a[i], p);
2387 : }
2388 : }
2389 14 : p = a[0];
2390 350 : for(i=1; i<N_SORT_BUCKET; i++){
2391 336 : p = merge_pagelist(p, a[i]);
2392 : }
2393 14 : return p;
2394 : }
2395 :
2396 : /*
2397 : ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
2398 : ** every one of those pages out to the database file and mark them all
2399 : ** as clean.
2400 : */
2401 14 : static int pager_write_pagelist(PgHdr *pList){
2402 : Pager *pPager;
2403 : int rc;
2404 :
2405 14 : if( pList==0 ) return SQLITE_OK;
2406 14 : pPager = pList->pPager;
2407 :
2408 : /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
2409 : ** database file. If there is already an EXCLUSIVE lock, the following
2410 : ** calls to sqlite3OsLock() are no-ops.
2411 : **
2412 : ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
2413 : ** through an intermediate state PENDING. A PENDING lock prevents new
2414 : ** readers from attaching to the database but is unsufficient for us to
2415 : ** write. The idea of a PENDING lock is to prevent new readers from
2416 : ** coming in while we wait for existing readers to clear.
2417 : **
2418 : ** While the pager is in the RESERVED state, the original database file
2419 : ** is unchanged and we can rollback without having to playback the
2420 : ** journal into the original database file. Once we transition to
2421 : ** EXCLUSIVE, it means the database file has been changed and any rollback
2422 : ** will require a journal playback.
2423 : */
2424 14 : rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
2425 14 : if( rc!=SQLITE_OK ){
2426 0 : return rc;
2427 : }
2428 :
2429 14 : pList = sort_pagelist(pList);
2430 55 : while( pList ){
2431 : assert( pList->dirty );
2432 27 : rc = sqlite3OsSeek(pPager->fd, (pList->pgno-1)*(i64)pPager->pageSize);
2433 27 : if( rc ) return rc;
2434 : /* If there are dirty pages in the page cache with page numbers greater
2435 : ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
2436 : ** make the file smaller (presumably by auto-vacuum code). Do not write
2437 : ** any such pages to the file.
2438 : */
2439 27 : if( pList->pgno<=pPager->dbSize ){
2440 27 : char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
2441 : PAGERTRACE3("STORE %d page %d\n", PAGERID(pPager), pList->pgno);
2442 : IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
2443 27 : rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize);
2444 : PAGER_INCR(sqlite3_pager_writedb_count);
2445 : PAGER_INCR(pPager->nWrite);
2446 27 : if( pList->pgno==1 ){
2447 14 : memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
2448 : }
2449 : }
2450 : #ifndef NDEBUG
2451 : else{
2452 : PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
2453 : }
2454 : #endif
2455 27 : if( rc ) return rc;
2456 27 : pList->dirty = 0;
2457 : #ifdef SQLITE_CHECK_PAGES
2458 : pList->pageHash = pager_pagehash(pList);
2459 : #endif
2460 27 : pList = pList->pDirty;
2461 : }
2462 14 : return SQLITE_OK;
2463 : }
2464 :
2465 : /*
2466 : ** Collect every dirty page into a dirty list and
2467 : ** return a pointer to the head of that list. All pages are
2468 : ** collected even if they are still in use.
2469 : */
2470 209 : static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
2471 209 : return pPager->pDirty;
2472 : }
2473 :
2474 : /*
2475 : ** Return TRUE if there is a hot journal on the given pager.
2476 : ** A hot journal is one that needs to be played back.
2477 : **
2478 : ** If the current size of the database file is 0 but a journal file
2479 : ** exists, that is probably an old journal left over from a prior
2480 : ** database with the same name. Just delete the journal.
2481 : */
2482 21 : static int hasHotJournal(Pager *pPager){
2483 21 : if( !pPager->useJournal ) return 0;
2484 21 : if( !sqlite3OsFileExists(pPager->zJournal) ){
2485 21 : return 0;
2486 : }
2487 0 : if( sqlite3OsCheckReservedLock(pPager->fd) ){
2488 0 : return 0;
2489 : }
2490 0 : if( sqlite3PagerPagecount(pPager)==0 ){
2491 0 : sqlite3OsDelete(pPager->zJournal);
2492 0 : return 0;
2493 : }else{
2494 0 : return 1;
2495 : }
2496 : }
2497 :
2498 : /*
2499 : ** Try to find a page in the cache that can be recycled.
2500 : **
2501 : ** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It
2502 : ** does not set the pPager->errCode variable.
2503 : */
2504 0 : static int pager_recycle(Pager *pPager, int syncOk, PgHdr **ppPg){
2505 : PgHdr *pPg;
2506 0 : *ppPg = 0;
2507 :
2508 : assert(!MEMDB);
2509 :
2510 : /* Find a page to recycle. Try to locate a page that does not
2511 : ** require us to do an fsync() on the journal.
2512 : */
2513 0 : pPg = pPager->pFirstSynced;
2514 :
2515 : /* If we could not find a page that does not require an fsync()
2516 : ** on the journal file then fsync the journal file. This is a
2517 : ** very slow operation, so we work hard to avoid it. But sometimes
2518 : ** it can't be helped.
2519 : */
2520 0 : if( pPg==0 && pPager->pFirst && syncOk && !MEMDB){
2521 0 : int rc = syncJournal(pPager);
2522 0 : if( rc!=0 ){
2523 0 : return rc;
2524 : }
2525 0 : if( pPager->fullSync ){
2526 : /* If in full-sync mode, write a new journal header into the
2527 : ** journal file. This is done to avoid ever modifying a journal
2528 : ** header that is involved in the rollback of pages that have
2529 : ** already been written to the database (in case the header is
2530 : ** trashed when the nRec field is updated).
2531 : */
2532 0 : pPager->nRec = 0;
2533 : assert( pPager->journalOff > 0 );
2534 : assert( pPager->doNotSync==0 );
2535 0 : rc = writeJournalHdr(pPager);
2536 0 : if( rc!=0 ){
2537 0 : return rc;
2538 : }
2539 : }
2540 0 : pPg = pPager->pFirst;
2541 : }
2542 0 : if( pPg==0 ){
2543 0 : return SQLITE_OK;
2544 : }
2545 :
2546 : assert( pPg->nRef==0 );
2547 :
2548 : /* Write the page to the database file if it is dirty.
2549 : */
2550 0 : if( pPg->dirty ){
2551 : int rc;
2552 : assert( pPg->needSync==0 );
2553 0 : makeClean(pPg);
2554 0 : pPg->dirty = 1;
2555 0 : pPg->pDirty = 0;
2556 0 : rc = pager_write_pagelist( pPg );
2557 0 : if( rc!=SQLITE_OK ){
2558 0 : return rc;
2559 : }
2560 : }
2561 : assert( pPg->dirty==0 );
2562 :
2563 : /* If the page we are recycling is marked as alwaysRollback, then
2564 : ** set the global alwaysRollback flag, thus disabling the
2565 : ** sqlite3PagerDontRollback() optimization for the rest of this transaction.
2566 : ** It is necessary to do this because the page marked alwaysRollback
2567 : ** might be reloaded at a later time but at that point we won't remember
2568 : ** that is was marked alwaysRollback. This means that all pages must
2569 : ** be marked as alwaysRollback from here on out.
2570 : */
2571 0 : if( pPg->alwaysRollback ){
2572 : IOTRACE(("ALWAYS_ROLLBACK %p\n", pPager))
2573 0 : pPager->alwaysRollback = 1;
2574 : }
2575 :
2576 : /* Unlink the old page from the free list and the hash table
2577 : */
2578 0 : unlinkPage(pPg);
2579 : assert( pPg->pgno==0 );
2580 :
2581 0 : *ppPg = pPg;
2582 0 : return SQLITE_OK;
2583 : }
2584 :
2585 : /*
2586 : ** This function is called to free superfluous dynamically allocated memory
2587 : ** held by the pager system. Memory in use by any SQLite pager allocated
2588 : ** by the current thread may be sqliteFree()ed.
2589 : **
2590 : ** nReq is the number of bytes of memory required. Once this much has
2591 : ** been released, the function returns. A negative value for nReq means
2592 : ** free as much memory as possible. The return value is the total number
2593 : ** of bytes of memory released.
2594 : */
2595 : #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
2596 : int sqlite3PagerReleaseMemory(int nReq){
2597 : const ThreadData *pTsdro = sqlite3ThreadDataReadOnly();
2598 : int nReleased = 0;
2599 : int i;
2600 :
2601 : /* If the the global mutex is held, this subroutine becomes a
2602 : ** o-op; zero bytes of memory are freed. This is because
2603 : ** some of the code invoked by this function may also
2604 : ** try to obtain the mutex, resulting in a deadlock.
2605 : */
2606 : if( sqlite3OsInMutex(0) ){
2607 : return 0;
2608 : }
2609 :
2610 : /* Outermost loop runs for at most two iterations. First iteration we
2611 : ** try to find memory that can be released without calling fsync(). Second
2612 : ** iteration (which only runs if the first failed to free nReq bytes of
2613 : ** memory) is permitted to call fsync(). This is of course much more
2614 : ** expensive.
2615 : */
2616 : for(i=0; i<=1; i++){
2617 :
2618 : /* Loop through all the SQLite pagers opened by the current thread. */
2619 : Pager *pPager = pTsdro->pPager;
2620 : for( ; pPager && (nReq<0 || nReleased<nReq); pPager=pPager->pNext){
2621 : PgHdr *pPg;
2622 : int rc;
2623 :
2624 : if( MEMDB ){
2625 : continue;
2626 : }
2627 :
2628 : /* For each pager, try to free as many pages as possible (without
2629 : ** calling fsync() if this is the first iteration of the outermost
2630 : ** loop).
2631 : */
2632 : while( SQLITE_OK==(rc = pager_recycle(pPager, i, &pPg)) && pPg) {
2633 : /* We've found a page to free. At this point the page has been
2634 : ** removed from the page hash-table, free-list and synced-list
2635 : ** (pFirstSynced). It is still in the all pages (pAll) list.
2636 : ** Remove it from this list before freeing.
2637 : **
2638 : ** Todo: Check the Pager.pStmt list to make sure this is Ok. It
2639 : ** probably is though.
2640 : */
2641 : PgHdr *pTmp;
2642 : assert( pPg );
2643 : if( pPg==pPager->pAll ){
2644 : pPager->pAll = pPg->pNextAll;
2645 : }else{
2646 : for( pTmp=pPager->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){}
2647 : pTmp->pNextAll = pPg->pNextAll;
2648 : }
2649 : nReleased += sqliteAllocSize(pPg);
2650 : IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
2651 : PAGER_INCR(sqlite3_pager_pgfree_count);
2652 : sqliteFree(pPg);
2653 : }
2654 :
2655 : if( rc!=SQLITE_OK ){
2656 : /* An error occured whilst writing to the database file or
2657 : ** journal in pager_recycle(). The error is not returned to the
2658 : ** caller of this function. Instead, set the Pager.errCode variable.
2659 : ** The error will be returned to the user (or users, in the case
2660 : ** of a shared pager cache) of the pager for which the error occured.
2661 : */
2662 : assert( (rc&0xff)==SQLITE_IOERR || rc==SQLITE_FULL );
2663 : assert( pPager->state>=PAGER_RESERVED );
2664 : pager_error(pPager, rc);
2665 : }
2666 : }
2667 : }
2668 :
2669 : return nReleased;
2670 : }
2671 : #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
2672 :
2673 : /*
2674 : ** Read the content of page pPg out of the database file.
2675 : */
2676 7 : static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
2677 : int rc;
2678 : assert( MEMDB==0 );
2679 7 : rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
2680 7 : if( rc==SQLITE_OK ){
2681 7 : rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg),
2682 : pPager->pageSize);
2683 : }
2684 : PAGER_INCR(sqlite3_pager_readdb_count);
2685 : PAGER_INCR(pPager->nRead);
2686 : IOTRACE(("PGIN %p %d\n", pPager, pgno));
2687 : PAGERTRACE3("FETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
2688 7 : if( pgno==1 ){
2689 4 : memcpy(&pPager->dbFileVers, &((u8*)PGHDR_TO_DATA(pPg))[24],
2690 : sizeof(pPager->dbFileVers));
2691 : }
2692 : CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
2693 7 : return rc;
2694 : }
2695 :
2696 :
2697 : /*
2698 : ** This function is called to obtain the shared lock required before
2699 : ** data may be read from the pager cache. If the shared lock has already
2700 : ** been obtained, this function is a no-op.
2701 : **
2702 : ** Immediately after obtaining the shared lock (if required), this function
2703 : ** checks for a hot-journal file. If one is found, an emergency rollback
2704 : ** is performed immediately.
2705 : */
2706 2114 : static int pagerSharedLock(Pager *pPager){
2707 2114 : int rc = SQLITE_OK;
2708 :
2709 2114 : if( pPager->state==PAGER_UNLOCK ){
2710 755 : if( !MEMDB ){
2711 : assert( pPager->nRef==0 );
2712 21 : if( !pPager->noReadlock ){
2713 21 : rc = pager_wait_on_lock(pPager, SHARED_LOCK);
2714 21 : if( rc!=SQLITE_OK ){
2715 0 : return pager_error(pPager, rc);
2716 : }
2717 : assert( pPager->state>=SHARED_LOCK );
2718 : }
2719 :
2720 : /* If a journal file exists, and there is no RESERVED lock on the
2721 : ** database file, then it either needs to be played back or deleted.
2722 : */
2723 21 : if( hasHotJournal(pPager) ){
2724 : /* Get an EXCLUSIVE lock on the database file. At this point it is
2725 : ** important that a RESERVED lock is not obtained on the way to the
2726 : ** EXCLUSIVE lock. If it were, another process might open the
2727 : ** database file, detect the RESERVED lock, and conclude that the
2728 : ** database is safe to read while this process is still rolling it
2729 : ** back.
2730 : **
2731 : ** Because the intermediate RESERVED lock is not requested, the
2732 : ** second process will get to this point in the code and fail to
2733 : ** obtain it's own EXCLUSIVE lock on the database file.
2734 : */
2735 0 : rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
2736 0 : if( rc!=SQLITE_OK ){
2737 0 : pager_unlock(pPager);
2738 0 : return pager_error(pPager, rc);
2739 : }
2740 0 : pPager->state = PAGER_EXCLUSIVE;
2741 :
2742 : /* Open the journal for reading only. Return SQLITE_BUSY if
2743 : ** we are unable to open the journal file.
2744 : **
2745 : ** The journal file does not need to be locked itself. The
2746 : ** journal file is never open unless the main database file holds
2747 : ** a write lock, so there is never any chance of two or more
2748 : ** processes opening the journal at the same time.
2749 : **
2750 : ** Open the journal for read/write access. This is because in
2751 : ** exclusive-access mode the file descriptor will be kept open and
2752 : ** possibly used for a transaction later on. On some systems, the
2753 : ** OsTruncate() call used in exclusive-access mode also requires
2754 : ** a read/write file handle.
2755 : */
2756 0 : rc = SQLITE_BUSY;
2757 0 : if( sqlite3OsFileExists(pPager->zJournal) ){
2758 : int ro;
2759 : assert( !pPager->tempFile );
2760 0 : rc = sqlite3OsOpenReadWrite(pPager->zJournal, &pPager->jfd, &ro);
2761 : assert( rc!=SQLITE_OK || pPager->jfd );
2762 0 : if( ro ){
2763 0 : rc = SQLITE_BUSY;
2764 0 : sqlite3OsClose(&pPager->jfd);
2765 : }
2766 : }
2767 0 : if( rc!=SQLITE_OK ){
2768 0 : pager_unlock(pPager);
2769 0 : return SQLITE_BUSY;
2770 : }
2771 0 : pPager->journalOpen = 1;
2772 0 : pPager->journalStarted = 0;
2773 0 : pPager->journalOff = 0;
2774 0 : pPager->setMaster = 0;
2775 0 : pPager->journalHdr = 0;
2776 :
2777 : /* Playback and delete the journal. Drop the database write
2778 : ** lock and reacquire the read lock.
2779 : */
2780 0 : rc = pager_playback(pPager, 1);
2781 0 : if( rc!=SQLITE_OK ){
2782 0 : return pager_error(pPager, rc);
2783 : }
2784 : assert(pPager->state==PAGER_SHARED ||
2785 : (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
2786 : );
2787 : }
2788 :
2789 21 : if( pPager->pAll ){
2790 : /* The shared-lock has just been acquired on the database file
2791 : ** and there are already pages in the cache (from a previous
2792 : ** read or write transaction). Check to see if the database
2793 : ** has been modified. If the database has changed, flush the
2794 : ** cache.
2795 : **
2796 : ** Database changes is detected by looking at 15 bytes beginning
2797 : ** at offset 24 into the file. The first 4 of these 16 bytes are
2798 : ** a 32-bit counter that is incremented with each change. The
2799 : ** other bytes change randomly with each file change when
2800 : ** a codec is in use.
2801 : **
2802 : ** There is a vanishingly small chance that a change will not be
2803 : ** deteched. The chance of an undetected change is so small that
2804 : ** it can be neglected.
2805 : */
2806 : char dbFileVers[sizeof(pPager->dbFileVers)];
2807 17 : sqlite3PagerPagecount(pPager);
2808 :
2809 17 : if( pPager->errCode ){
2810 0 : return pPager->errCode;
2811 : }
2812 :
2813 17 : if( pPager->dbSize>0 ){
2814 17 : rc = sqlite3OsSeek(pPager->fd, 24);
2815 17 : if( rc!=SQLITE_OK ){
2816 0 : return rc;
2817 : }
2818 17 : rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers));
2819 17 : if( rc!=SQLITE_OK ){
2820 0 : return rc;
2821 : }
2822 : }else{
2823 0 : memset(dbFileVers, 0, sizeof(dbFileVers));
2824 : }
2825 :
2826 17 : if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
2827 0 : pager_reset(pPager);
2828 : }
2829 : }
2830 : }
2831 : assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
2832 755 : if( pPager->state==PAGER_UNLOCK ){
2833 734 : pPager->state = PAGER_SHARED;
2834 : }
2835 : }
2836 :
2837 2114 : return rc;
2838 : }
2839 :
2840 : /*
2841 : ** Allocate a PgHdr object. Either create a new one or reuse
2842 : ** an existing one that is not otherwise in use.
2843 : **
2844 : ** A new PgHdr structure is created if any of the following are
2845 : ** true:
2846 : **
2847 : ** (1) We have not exceeded our maximum allocated cache size
2848 : ** as set by the "PRAGMA cache_size" command.
2849 : **
2850 : ** (2) There are no unused PgHdr objects available at this time.
2851 : **
2852 : ** (3) This is an in-memory database.
2853 : **
2854 : ** (4) There are no PgHdr objects that do not require a journal
2855 : ** file sync and a sync of the journal file is currently
2856 : ** prohibited.
2857 : **
2858 : ** Otherwise, reuse an existing PgHdr. In other words, reuse an
2859 : ** existing PgHdr if all of the following are true:
2860 : **
2861 : ** (1) We have reached or exceeded the maximum cache size
2862 : ** allowed by "PRAGMA cache_size".
2863 : **
2864 : ** (2) There is a PgHdr available with PgHdr->nRef==0
2865 : **
2866 : ** (3) We are not in an in-memory database
2867 : **
2868 : ** (4) Either there is an available PgHdr that does not need
2869 : ** to be synced to disk or else disk syncing is currently
2870 : ** allowed.
2871 : */
2872 204 : static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){
2873 204 : int rc = SQLITE_OK;
2874 : PgHdr *pPg;
2875 :
2876 : /* Create a new PgHdr if any of the four conditions defined
2877 : ** above is met: */
2878 408 : if( pPager->nPage<pPager->mxPage
2879 : || pPager->pFirst==0
2880 : || MEMDB
2881 : || (pPager->pFirstSynced==0 && pPager->doNotSync)
2882 : ){
2883 204 : if( pPager->nPage>=pPager->nHash ){
2884 108 : pager_resize_hash_table(pPager,
2885 : pPager->nHash<256 ? 256 : pPager->nHash*2);
2886 108 : if( pPager->nHash==0 ){
2887 0 : rc = SQLITE_NOMEM;
2888 0 : goto pager_allocate_out;
2889 : }
2890 : }
2891 204 : pPg = sqliteMallocRaw( sizeof(*pPg) + pPager->pageSize
2892 : + sizeof(u32) + pPager->nExtra
2893 : + MEMDB*sizeof(PgHistory) );
2894 204 : if( pPg==0 ){
2895 0 : rc = SQLITE_NOMEM;
2896 0 : goto pager_allocate_out;
2897 : }
2898 204 : memset(pPg, 0, sizeof(*pPg));
2899 204 : if( MEMDB ){
2900 196 : memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
2901 : }
2902 204 : pPg->pPager = pPager;
2903 204 : pPg->pNextAll = pPager->pAll;
2904 204 : pPager->pAll = pPg;
2905 204 : pPager->nPage++;
2906 204 : if( pPager->nPage>pPager->nMaxPage ){
2907 : assert( pPager->nMaxPage==(pPager->nPage-1) );
2908 204 : pPager->nMaxPage++;
2909 : }
2910 : }else{
2911 : /* Recycle an existing page with a zero ref-count. */
2912 0 : rc = pager_recycle(pPager, 1, &pPg);
2913 0 : if( rc!=SQLITE_OK ){
2914 0 : goto pager_allocate_out;
2915 : }
2916 : assert( pPager->state>=SHARED_LOCK );
2917 : assert(pPg);
2918 : }
2919 204 : *ppPg = pPg;
2920 :
2921 204 : pager_allocate_out:
2922 204 : return rc;
2923 : }
2924 :
2925 : /*
2926 : ** Acquire a page.
2927 : **
2928 : ** A read lock on the disk file is obtained when the first page is acquired.
2929 : ** This read lock is dropped when the last page is released.
2930 : **
2931 : ** A _get works for any page number greater than 0. If the database
2932 : ** file is smaller than the requested page, then no actual disk
2933 : ** read occurs and the memory image of the page is initialized to
2934 : ** all zeros. The extra data appended to a page is always initialized
2935 : ** to zeros the first time a page is loaded into memory.
2936 : **
2937 : ** The acquisition might fail for several reasons. In all cases,
2938 : ** an appropriate error code is returned and *ppPage is set to NULL.
2939 : **
2940 : ** See also sqlite3PagerLookup(). Both this routine and _lookup() attempt
2941 : ** to find a page in the in-memory cache first. If the page is not already
2942 : ** in memory, this routine goes to disk to read it in whereas _lookup()
2943 : ** just returns 0. This routine acquires a read-lock the first time it
2944 : ** has to go to disk, and could also playback an old journal if necessary.
2945 : ** Since _lookup() never goes to disk, it never has to deal with locks
2946 : ** or journal files.
2947 : **
2948 : ** If noContent is false, the page contents are actually read from disk.
2949 : ** If noContent is true, it means that we do not care about the contents
2950 : ** of the page at this time, so do not do a disk read. Just fill in the
2951 : ** page content with zeros. But mark the fact that we have not read the
2952 : ** content by setting the PgHdr.needRead flag. Later on, if
2953 : ** sqlite3PagerWrite() is called on this page, that means that the
2954 : ** content is needed and the disk read should occur at that point.
2955 : */
2956 : int sqlite3PagerAcquire(
2957 : Pager *pPager, /* The pager open on the database file */
2958 : Pgno pgno, /* Page number to fetch */
2959 : DbPage **ppPage, /* Write a pointer to the page here */
2960 : int noContent /* Do not bother reading content from disk if true */
2961 2114 : ){
2962 : PgHdr *pPg;
2963 : int rc;
2964 :
2965 : assert( pPager->state==PAGER_UNLOCK || pPager->nRef>0 || pgno==1 );
2966 :
2967 : /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
2968 : ** number greater than this, or zero, is requested.
2969 : */
2970 2114 : if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
2971 0 : return SQLITE_CORRUPT_BKPT;
2972 : }
2973 :
2974 : /* Make sure we have not hit any critical errors.
2975 : */
2976 : assert( pPager!=0 );
2977 2114 : *ppPage = 0;
2978 2114 : if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
2979 0 : return pPager->errCode;
2980 : }
2981 :
2982 : /* If this is the first page accessed, then get a SHARED lock
2983 : ** on the database file. pagerSharedLock() is a no-op if
2984 : ** a database lock is already held.
2985 : */
2986 2114 : rc = pagerSharedLock(pPager);
2987 2114 : if( rc!=SQLITE_OK ){
2988 0 : return rc;
2989 : }
2990 : assert( pPager->state!=PAGER_UNLOCK );
2991 :
2992 2114 : pPg = pager_lookup(pPager, pgno);
2993 2114 : if( pPg==0 ){
2994 : /* The requested page is not in the page cache. */
2995 : int nMax;
2996 : int h;
2997 : PAGER_INCR(pPager->nMiss);
2998 204 : rc = pagerAllocatePage(pPager, &pPg);
2999 204 : if( rc!=SQLITE_OK ){
3000 0 : return rc;
3001 : }
3002 :
3003 204 : pPg->pgno = pgno;
3004 : assert( !MEMDB || pgno>pPager->stmtSize );
3005 207 : if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
3006 : sqlite3CheckMemory(pPager->aInJournal, pgno/8);
3007 : assert( pPager->journalOpen );
3008 3 : pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
3009 3 : pPg->needSync = 0;
3010 : }else{
3011 201 : pPg->inJournal = 0;
3012 201 : pPg->needSync = 0;
3013 : }
3014 :
3015 204 : makeClean(pPg);
3016 204 : pPg->nRef = 1;
3017 : REFINFO(pPg);
3018 :
3019 204 : pPager->nRef++;
3020 204 : if( pPager->nExtra>0 ){
3021 204 : memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
3022 : }
3023 204 : nMax = sqlite3PagerPagecount(pPager);
3024 204 : if( pPager->errCode ){
3025 0 : sqlite3PagerUnref(pPg);
3026 0 : rc = pPager->errCode;
3027 0 : return rc;
3028 : }
3029 :
3030 : /* Populate the page with data, either by reading from the database
3031 : ** file, or by setting the entire page to zero.
3032 : */
3033 401 : if( nMax<(int)pgno || MEMDB || (noContent && !pPager->alwaysRollback) ){
3034 197 : memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
3035 197 : pPg->needRead = noContent && !pPager->alwaysRollback;
3036 : IOTRACE(("ZERO %p %d\n", pPager, pgno));
3037 : }else{
3038 7 : rc = readDbPage(pPager, pPg, pgno);
3039 7 : if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
3040 0 : pPg->pgno = 0;
3041 0 : sqlite3PagerUnref(pPg);
3042 0 : return rc;
3043 : }
3044 : }
3045 :
3046 : /* Link the page into the page hash table */
3047 204 : h = pgno & (pPager->nHash-1);
3048 : assert( pgno!=0 );
3049 204 : pPg->pNextHash = pPager->aHash[h];
3050 204 : pPager->aHash[h] = pPg;
3051 204 : if( pPg->pNextHash ){
3052 : assert( pPg->pNextHash->pPrevHash==0 );
3053 0 : pPg->pNextHash->pPrevHash = pPg;
3054 : }
3055 :
3056 : #ifdef SQLITE_CHECK_PAGES
3057 : pPg->pageHash = pager_pagehash(pPg);
3058 : #endif
3059 : }else{
3060 : /* The requested page is in the page cache. */
3061 : assert(pPager->nRef>0 || pgno==1);
3062 : PAGER_INCR(pPager->nHit);
3063 1910 : page_ref(pPg);
3064 : }
3065 2114 : *ppPage = pPg;
3066 2114 : return SQLITE_OK;
3067 : }
3068 :
3069 : /*
3070 : ** Acquire a page if it is already in the in-memory cache. Do
3071 : ** not read the page from disk. Return a pointer to the page,
3072 : ** or 0 if the page is not in cache.
3073 : **
3074 : ** See also sqlite3PagerGet(). The difference between this routine
3075 : ** and sqlite3PagerGet() is that _get() will go to the disk and read
3076 : ** in the page if the page is not already in cache. This routine
3077 : ** returns NULL if the page is not in cache or if a disk I/O error
3078 : ** has ever happened.
3079 : */
3080 0 : DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
3081 : PgHdr *pPg;
3082 :
3083 : assert( pPager!=0 );
3084 : assert( pgno!=0 );
3085 :
3086 0 : if( pPager->state==PAGER_UNLOCK ){
3087 : assert( !pPager->pAll || pPager->exclusiveMode );
3088 0 : return 0;
3089 : }
3090 0 : if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
3091 0 : return 0;
3092 : }
3093 0 : pPg = pager_lookup(pPager, pgno);
3094 0 : if( pPg==0 ) return 0;
3095 0 : page_ref(pPg);
3096 0 : return pPg;
3097 : }
3098 :
3099 : /*
3100 : ** Release a page.
3101 : **
3102 : ** If the number of references to the page drop to zero, then the
3103 : ** page is added to the LRU list. When all references to all pages
3104 : ** are released, a rollback occurs and the lock on the database is
3105 : ** removed.
3106 : */
3107 2112 : int sqlite3PagerUnref(DbPage *pPg){
3108 :
3109 : /* Decrement the reference count for this page
3110 : */
3111 : assert( pPg->nRef>0 );
3112 2112 : pPg->nRef--;
3113 : REFINFO(pPg);
3114 :
3115 : CHECK_PAGE(pPg);
3116 :
3117 : /* When the number of references to a page reach 0, call the
3118 : ** destructor and add the page to the freelist.
3119 : */
3120 2112 : if( pPg->nRef==0 ){
3121 : Pager *pPager;
3122 1298 : pPager = pPg->pPager;
3123 1298 : pPg->pNextFree = 0;
3124 1298 : pPg->pPrevFree = pPager->pLast;
3125 1298 : pPager->pLast = pPg;
3126 1298 : if( pPg->pPrevFree ){
3127 644 : pPg->pPrevFree->pNextFree = pPg;
3128 : }else{
3129 654 : pPager->pFirst = pPg;
3130 : }
3131 1298 : if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
3132 639 : pPager->pFirstSynced = pPg;
3133 : }
3134 1298 : if( pPager->xDestructor ){
3135 1298 : pPager->xDestructor(pPg, pPager->pageSize);
3136 : }
3137 :
3138 : /* When all pages reach the freelist, drop the read lock from
3139 : ** the database file.
3140 : */
3141 1298 : pPager->nRef--;
3142 : assert( pPager->nRef>=0 );
3143 1298 : if( pPager->nRef==0 && (!pPager->exclusiveMode || pPager->journalOff>0) ){
3144 754 : pagerUnlockAndRollback(pPager);
3145 : }
3146 : }
3147 2112 : return SQLITE_OK;
3148 : }
3149 :
3150 : /*
3151 : ** Create a journal file for pPager. There should already be a RESERVED
3152 : ** or EXCLUSIVE lock on the database file when this routine is called.
3153 : **
3154 : ** Return SQLITE_OK if everything. Return an error code and release the
3155 : ** write lock if anything goes wrong.
3156 : */
3157 15 : static int pager_open_journal(Pager *pPager){
3158 : int rc;
3159 : assert( !MEMDB );
3160 : assert( pPager->state>=PAGER_RESERVED );
3161 : assert( pPager->journalOpen==0 );
3162 : assert( pPager->useJournal );
3163 : assert( pPager->aInJournal==0 );
3164 15 : sqlite3PagerPagecount(pPager);
3165 15 : pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
3166 15 : if( pPager->aInJournal==0 ){
3167 0 : rc = SQLITE_NOMEM;
3168 0 : goto failed_to_open_journal;
3169 : }
3170 15 : rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,
3171 : pPager->tempFile);
3172 : assert( rc!=SQLITE_OK || pPager->jfd );
3173 15 : pPager->journalOff = 0;
3174 15 : pPager->setMaster = 0;
3175 15 : pPager->journalHdr = 0;
3176 15 : if( rc!=SQLITE_OK ){
3177 0 : if( rc==SQLITE_NOMEM ){
3178 0 : sqlite3OsDelete(pPager->zJournal);
3179 : }
3180 0 : goto failed_to_open_journal;
3181 : }
3182 15 : sqlite3OsSetFullSync(pPager->jfd, pPager->full_fsync);
3183 15 : sqlite3OsSetFullSync(pPager->fd, pPager->full_fsync);
3184 15 : sqlite3OsOpenDirectory(pPager->jfd, pPager->zDirectory);
3185 15 : pPager->journalOpen = 1;
3186 15 : pPager->journalStarted = 0;
3187 15 : pPager->needSync = 0;
3188 15 : pPager->alwaysRollback = 0;
3189 15 : pPager->nRec = 0;
3190 15 : if( pPager->errCode ){
3191 0 : rc = pPager->errCode;
3192 0 : goto failed_to_open_journal;
3193 : }
3194 15 : pPager->origDbSize = pPager->dbSize;
3195 :
3196 15 : rc = writeJournalHdr(pPager);
3197 :
3198 15 : if( pPager->stmtAutoopen && rc==SQLITE_OK ){
3199 0 : rc = sqlite3PagerStmtBegin(pPager);
3200 : }
3201 15 : if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
3202 0 : rc = pager_end_transaction(pPager);
3203 0 : if( rc==SQLITE_OK ){
3204 0 : rc = SQLITE_FULL;
3205 : }
3206 : }
3207 15 : return rc;
3208 :
3209 0 : failed_to_open_journal:
3210 0 : sqliteFree(pPager->aInJournal);
3211 0 : pPager->aInJournal = 0;
3212 0 : return rc;
3213 : }
3214 :
3215 : /*
3216 : ** Acquire a write-lock on the database. The lock is removed when
3217 : ** the any of the following happen:
3218 : **
3219 : ** * sqlite3PagerCommitPhaseTwo() is called.
3220 : ** * sqlite3PagerRollback() is called.
3221 : ** * sqlite3PagerClose() is called.
3222 : ** * sqlite3PagerUnref() is called to on every outstanding page.
3223 : **
3224 : ** The first parameter to this routine is a pointer to any open page of the
3225 : ** database file. Nothing changes about the page - it is used merely to
3226 : ** acquire a pointer to the Pager structure and as proof that there is
3227 : ** already a read-lock on the database.
3228 : **
3229 : ** The second parameter indicates how much space in bytes to reserve for a
3230 : ** master journal file-name at the start of the journal when it is created.
3231 : **
3232 : ** A journal file is opened if this is not a temporary file. For temporary
3233 : ** files, the opening of the journal file is deferred until there is an
3234 : ** actual need to write to the journal.
3235 : **
3236 : ** If the database is already reserved for writing, this routine is a no-op.
3237 : **
3238 : ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
3239 : ** immediately instead of waiting until we try to flush the cache. The
3240 : ** exFlag is ignored if a transaction is already active.
3241 : */
3242 651 : int sqlite3PagerBegin(DbPage *pPg, int exFlag){
3243 651 : Pager *pPager = pPg->pPager;
3244 651 : int rc = SQLITE_OK;
3245 : assert( pPg->nRef>0 );
3246 : assert( pPager->state!=PAGER_UNLOCK );
3247 651 : if( pPager->state==PAGER_SHARED ){
3248 : assert( pPager->aInJournal==0 );
3249 211 : if( MEMDB ){
3250 196 : pPager->state = PAGER_EXCLUSIVE;
3251 196 : pPager->origDbSize = pPager->dbSize;
3252 : }else{
3253 15 : rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
3254 15 : if( rc==SQLITE_OK ){
3255 15 : pPager->state = PAGER_RESERVED;
3256 15 : if( exFlag ){
3257 0 : rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
3258 : }
3259 : }
3260 15 : if( rc!=SQLITE_OK ){
3261 0 : return rc;
3262 : }
3263 15 : pPager->dirtyCache = 0;
3264 : PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
3265 15 : if( pPager->useJournal && !pPager->tempFile ){
3266 15 : rc = pager_open_journal(pPager);
3267 : }
3268 : }
3269 440 : }else if( pPager->journalOpen && pPager->journalOff==0 ){
3270 : /* This happens when the pager was in exclusive-access mode last
3271 : ** time a (read or write) transaction was successfully concluded
3272 : ** by this connection. Instead of deleting the journal file it was
3273 : ** kept open and truncated to 0 bytes.
3274 : */
3275 : assert( pPager->nRec==0 );
3276 : assert( pPager->origDbSize==0 );
3277 : assert( pPager->aInJournal==0 );
3278 0 : sqlite3PagerPagecount(pPager);
3279 0 : pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
3280 0 : if( !pPager->aInJournal ){
3281 0 : rc = SQLITE_NOMEM;
3282 : }else{
3283 0 : pPager->origDbSize = pPager->dbSize;
3284 0 : rc = writeJournalHdr(pPager);
3285 : }
3286 : }
3287 : assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
3288 651 : return rc;
3289 : }
3290 :
3291 : /*
3292 : ** Make a page dirty. Set its dirty flag and add it to the dirty
3293 : ** page list.
3294 : */
3295 767 : static void makeDirty(PgHdr *pPg){
3296 767 : if( pPg->dirty==0 ){
3297 440 : Pager *pPager = pPg->pPager;
3298 440 : pPg->dirty = 1;
3299 440 : pPg->pDirty = pPager->pDirty;
3300 440 : if( pPager->pDirty ){
3301 229 : pPager->pDirty->pPrevDirty = pPg;
3302 : }
3303 440 : pPg->pPrevDirty = 0;
3304 440 : pPager->pDirty = pPg;
3305 : }
3306 767 : }
3307 :
3308 : /*
3309 : ** Make a page clean. Clear its dirty bit and remove it from the
3310 : ** dirty page list.
3311 : */
3312 204 : static void makeClean(PgHdr *pPg){
3313 204 : if( pPg->dirty ){
3314 0 : pPg->dirty = 0;
3315 0 : if( pPg->pDirty ){
3316 0 : pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
3317 : }
3318 0 : if( pPg->pPrevDirty ){
3319 0 : pPg->pPrevDirty->pDirty = pPg->pDirty;
3320 : }else{
3321 0 : pPg->pPager->pDirty = pPg->pDirty;
3322 : }
3323 : }
3324 204 : }
3325 :
3326 :
3327 : /*
3328 : ** Mark a data page as writeable. The page is written into the journal
3329 : ** if it is not there already. This routine must be called before making
3330 : ** changes to a page.
3331 : **
3332 : ** The first time this routine is called, the pager creates a new
3333 : ** journal and acquires a RESERVED lock on the database. If the RESERVED
3334 : ** lock could not be acquired, this routine returns SQLITE_BUSY. The
3335 : ** calling routine must check for that return value and be careful not to
3336 : ** change any page data until this routine returns SQLITE_OK.
3337 : **
3338 : ** If the journal file could not be written because the disk is full,
3339 : ** then this routine returns SQLITE_FULL and does an immediate rollback.
3340 : ** All subsequent write attempts also return SQLITE_FULL until there
3341 : ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
3342 : ** reset.
3343 : */
3344 767 : static int pager_write(PgHdr *pPg){
3345 767 : void *pData = PGHDR_TO_DATA(pPg);
3346 767 : Pager *pPager = pPg->pPager;
3347 767 : int rc = SQLITE_OK;
3348 :
3349 : /* Check for errors
3350 : */
3351 767 : if( pPager->errCode ){
3352 0 : return pPager->errCode;
3353 : }
3354 767 : if( pPager->readOnly ){
3355 0 : return SQLITE_PERM;
3356 : }
3357 :
3358 : assert( !pPager->setMaster );
3359 :
3360 : CHECK_PAGE(pPg);
3361 :
3362 : /* If this page was previously acquired with noContent==1, that means
3363 : ** we didn't really read in the content of the page. This can happen
3364 : ** (for example) when the page is being moved to the freelist. But
3365 : ** now we are (perhaps) moving the page off of the freelist for
3366 : ** reuse and we need to know its original content so that content
3367 : ** can be stored in the rollback journal. So do the read at this
3368 : ** time.
3369 : */
3370 767 : if( pPg->needRead ){
3371 0 : rc = readDbPage(pPager, pPg, pPg->pgno);
3372 0 : if( rc==SQLITE_OK ){
3373 0 : pPg->needRead = 0;
3374 : }else{
3375 0 : return rc;
3376 : }
3377 : }
3378 :
3379 : /* Mark the page as dirty. If the page has already been written
3380 : ** to the journal then we can return right away.
3381 : */
3382 767 : makeDirty(pPg);
3383 1094 : if( pPg->inJournal && (pageInStatement(pPg) || pPager->stmtInUse==0) ){
3384 327 : pPager->dirtyCache = 1;
3385 : }else{
3386 :
3387 : /* If we get this far, it means that the page needs to be
3388 : ** written to the transaction journal or the ckeckpoint journal
3389 : ** or both.
3390 : **
3391 : ** First check to see that the transaction journal exists and
3392 : ** create it if it does not.
3393 : */
3394 : assert( pPager->state!=PAGER_UNLOCK );
3395 440 : rc = sqlite3PagerBegin(pPg, 0);
3396 440 : if( rc!=SQLITE_OK ){
3397 0 : return rc;
3398 : }
3399 : assert( pPager->state>=PAGER_RESERVED );
3400 440 : if( !pPager->journalOpen && pPager->useJournal ){
3401 0 : rc = pager_open_journal(pPager);
3402 0 : if( rc!=SQLITE_OK ) return rc;
3403 : }
3404 : assert( pPager->journalOpen || !pPager->useJournal );
3405 440 : pPager->dirtyCache = 1;
3406 :
3407 : /* The transaction journal now exists and we have a RESERVED or an
3408 : ** EXCLUSIVE lock on the main database file. Write the current page to
3409 : ** the transaction journal if it is not there already.
3410 : */
3411 440 : if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
3412 440 : if( (int)pPg->pgno <= pPager->origDbSize ){
3413 : int szPg;
3414 297 : if( MEMDB ){
3415 270 : PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3416 : PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
3417 : assert( pHist->pOrig==0 );
3418 270 : pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
3419 270 : if( pHist->pOrig ){
3420 270 : memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
3421 : }
3422 : }else{
3423 : u32 cksum, saved;
3424 : char *pData2, *pEnd;
3425 : /* We should never write to the journal file the page that
3426 : ** contains the database locks. The following assert verifies
3427 : ** that we do not. */
3428 : assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
3429 27 : pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
3430 27 : cksum = pager_cksum(pPager, (u8*)pData2);
3431 27 : pEnd = pData2 + pPager->pageSize;
3432 27 : pData2 -= 4;
3433 27 : saved = *(u32*)pEnd;
3434 27 : put32bits(pEnd, cksum);
3435 27 : szPg = pPager->pageSize+8;
3436 27 : put32bits(pData2, pPg->pgno);
3437 27 : rc = sqlite3OsWrite(pPager->jfd, pData2, szPg);
3438 : IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
3439 : pPager->journalOff, szPg));
3440 : PAGER_INCR(sqlite3_pager_writej_count);
3441 27 : pPager->journalOff += szPg;
3442 : PAGERTRACE4("JOURNAL %d page %d needSync=%d\n",
3443 : PAGERID(pPager), pPg->pgno, pPg->needSync);
3444 27 : *(u32*)pEnd = saved;
3445 :
3446 : /* An error has occured writing to the journal file. The
3447 : ** transaction will be rolled back by the layer above.
3448 : */
3449 27 : if( rc!=SQLITE_OK ){
3450 0 : return rc;
3451 : }
3452 :
3453 27 : pPager->nRec++;
3454 : assert( pPager->aInJournal!=0 );
3455 27 : pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3456 27 : pPg->needSync = !pPager->noSync;
3457 27 : if( pPager->stmtInUse ){
3458 0 : pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3459 : }
3460 : }
3461 : }else{
3462 143 : pPg->needSync = !pPager->journalStarted && !pPager->noSync;
3463 : PAGERTRACE4("APPEND %d page %d needSync=%d\n",
3464 : PAGERID(pPager), pPg->pgno, pPg->needSync);
3465 : }
3466 440 : if( pPg->needSync ){
3467 170 : pPager->needSync = 1;
3468 : }
3469 440 : pPg->inJournal = 1;
3470 : }
3471 :
3472 : /* If the statement journal is open and the page is not in it,
3473 : ** then write the current page to the statement journal. Note that
3474 : ** the statement journal format differs from the standard journal format
3475 : ** in that it omits the checksums and the header.
3476 : */
3477 440 : if( pPager->stmtInUse
3478 : && !pageInStatement(pPg)
3479 : && (int)pPg->pgno<=pPager->stmtSize
3480 : ){
3481 : assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
3482 0 : if( MEMDB ){
3483 0 : PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3484 : assert( pHist->pStmt==0 );
3485 0 : pHist->pStmt = sqliteMallocRaw( pPager->pageSize );
3486 0 : if( pHist->pStmt ){
3487 0 : memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
3488 : }
3489 : PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
3490 0 : page_add_to_stmt_list(pPg);
3491 : }else{
3492 0 : char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7)-4;
3493 0 : put32bits(pData2, pPg->pgno);
3494 0 : rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize+4);
3495 : PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
3496 0 : if( rc!=SQLITE_OK ){
3497 0 : return rc;
3498 : }
3499 0 : pPager->stmtNRec++;
3500 : assert( pPager->aInStmt!=0 );
3501 0 : pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3502 : }
3503 : }
3504 : }
3505 :
3506 : /* Update the database size and return.
3507 : */
3508 : assert( pPager->state>=PAGER_SHARED );
3509 767 : if( pPager->dbSize<(int)pPg->pgno ){
3510 143 : pPager->dbSize = pPg->pgno;
3511 143 : if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
3512 0 : pPager->dbSize++;
3513 : }
3514 : }
3515 767 : return rc;
3516 : }
3517 :
3518 : /*
3519 : ** This function is used to mark a data-page as writable. It uses
3520 : ** pager_write() to open a journal file (if it is not already open)
3521 : ** and write the page *pData to the journal.
3522 : **
3523 : ** The difference between this function and pager_write() is that this
3524 : ** function also deals with the special case where 2 or more pages
3525 : ** fit on a single disk sector. In this case all co-resident pages
3526 : ** must have been written to the journal file before returning.
3527 : */
3528 767 : int sqlite3PagerWrite(DbPage *pDbPage){
3529 767 : int rc = SQLITE_OK;
3530 :
3531 767 : PgHdr *pPg = pDbPage;
3532 767 : Pager *pPager = pPg->pPager;
3533 767 : Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
3534 :
3535 767 : if( !MEMDB && nPagePerSector>1 ){
3536 : Pgno nPageCount; /* Total number of pages in database file */
3537 : Pgno pg1; /* First page of the sector pPg is located on. */
3538 : int nPage; /* Number of pages starting at pg1 to journal */
3539 : int ii;
3540 :
3541 : /* Set the doNotSync flag to 1. This is because we cannot allow a journal
3542 : ** header to be written between the pages journaled by this function.
3543 : */
3544 : assert( pPager->doNotSync==0 );
3545 0 : pPager->doNotSync = 1;
3546 :
3547 : /* This trick assumes that both the page-size and sector-size are
3548 : ** an integer power of 2. It sets variable pg1 to the identifier
3549 : ** of the first page of the sector pPg is located on.
3550 : */
3551 0 : pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
3552 :
3553 0 : nPageCount = sqlite3PagerPagecount(pPager);
3554 0 : if( pPg->pgno>nPageCount ){
3555 0 : nPage = (pPg->pgno - pg1)+1;
3556 0 : }else if( (pg1+nPagePerSector-1)>nPageCount ){
3557 0 : nPage = nPageCount+1-pg1;
3558 : }else{
3559 0 : nPage = nPagePerSector;
3560 : }
3561 : assert(nPage>0);
3562 : assert(pg1<=pPg->pgno);
3563 : assert((pg1+nPage)>pPg->pgno);
3564 :
3565 0 : for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
3566 0 : Pgno pg = pg1+ii;
3567 0 : if( !pPager->aInJournal || pg==pPg->pgno ||
3568 : pg>pPager->origDbSize || !(pPager->aInJournal[pg/8]&(1<<(pg&7)))
3569 : ) {
3570 0 : if( pg!=PAGER_MJ_PGNO(pPager) ){
3571 : PgHdr *pPage;
3572 0 : rc = sqlite3PagerGet(pPager, pg, &pPage);
3573 0 : if( rc==SQLITE_OK ){
3574 0 : rc = pager_write(pPage);
3575 0 : sqlite3PagerUnref(pPage);
3576 : }
3577 : }
3578 : }
3579 : }
3580 :
3581 : assert( pPager->doNotSync==1 );
3582 0 : pPager->doNotSync = 0;
3583 : }else{
3584 767 : rc = pager_write(pDbPage);
3585 : }
3586 767 : return rc;
3587 : }
3588 :
3589 : /*
3590 : ** Return TRUE if the page given in the argument was previously passed
3591 : ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok
3592 : ** to change the content of the page.
3593 : */
3594 : #ifndef NDEBUG
3595 : int sqlite3PagerIswriteable(DbPage *pPg){
3596 : return pPg->dirty;
3597 : }
3598 : #endif
3599 :
3600 : #ifndef SQLITE_OMIT_VACUUM
3601 : /*
3602 : ** Replace the content of a single page with the information in the third
3603 : ** argument.
3604 : */
3605 0 : int sqlite3PagerOverwrite(Pager *pPager, Pgno pgno, void *pData){
3606 : PgHdr *pPg;
3607 : int rc;
3608 :
3609 0 : rc = sqlite3PagerGet(pPager, pgno, &pPg);
3610 0 : if( rc==SQLITE_OK ){
3611 0 : rc = sqlite3PagerWrite(pPg);
3612 0 : if( rc==SQLITE_OK ){
3613 0 : memcpy(sqlite3PagerGetData(pPg), pData, pPager->pageSize);
3614 : }
3615 0 : sqlite3PagerUnref(pPg);
3616 : }
3617 0 : return rc;
3618 : }
3619 : #endif
3620 :
3621 : /*
3622 : ** A call to this routine tells the pager that it is not necessary to
3623 : ** write the information on page pPg back to the disk, even though
3624 : ** that page might be marked as dirty.
3625 : **
3626 : ** The overlying software layer calls this routine when all of the data
3627 : ** on the given page is unused. The pager marks the page as clean so
3628 : ** that it does not get written to disk.
3629 : **
3630 : ** Tests show that this optimization, together with the
3631 : ** sqlite3PagerDontRollback() below, more than double the speed
3632 : ** of large INSERT operations and quadruple the speed of large DELETEs.
3633 : **
3634 : ** When this routine is called, set the alwaysRollback flag to true.
3635 : ** Subsequent calls to sqlite3PagerDontRollback() for the same page
3636 : ** will thereafter be ignored. This is necessary to avoid a problem
3637 : ** where a page with data is added to the freelist during one part of
3638 : ** a transaction then removed from the freelist during a later part
3639 : ** of the same transaction and reused for some other purpose. When it
3640 : ** is first added to the freelist, this routine is called. When reused,
3641 : ** the sqlite3PagerDontRollback() routine is called. But because the
3642 : ** page contains critical data, we still need to be sure it gets
3643 : ** rolled back in spite of the sqlite3PagerDontRollback() call.
3644 : */
3645 0 : void sqlite3PagerDontWrite(DbPage *pDbPage){
3646 0 : PgHdr *pPg = pDbPage;
3647 0 : Pager *pPager = pPg->pPager;
3648 :
3649 0 : if( MEMDB ) return;
3650 0 : pPg->alwaysRollback = 1;
3651 0 : if( pPg->dirty && !pPager->stmtInUse ){
3652 : assert( pPager->state>=PAGER_SHARED );
3653 0 : if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
3654 : /* If this pages is the last page in the file and the file has grown
3655 : ** during the current transaction, then do NOT mark the page as clean.
3656 : ** When the database file grows, we must make sure that the last page
3657 : ** gets written at least once so that the disk file will be the correct
3658 : ** size. If you do not write this page and the size of the file
3659 : ** on the disk ends up being too small, that can lead to database
3660 : ** corruption during the next transaction.
3661 : */
3662 : }else{
3663 : PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
3664 : IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
3665 0 : makeClean(pPg);
3666 : #ifdef SQLITE_CHECK_PAGES
3667 : pPg->pageHash = pager_pagehash(pPg);
3668 : #endif
3669 : }
3670 : }
3671 : }
3672 :
3673 : /*
3674 : ** A call to this routine tells the pager that if a rollback occurs,
3675 : ** it is not necessary to restore the data on the given page. This
3676 : ** means that the pager does not have to record the given page in the
3677 : ** rollback journal.
3678 : **
3679 : ** If we have not yet actually read the content of this page (if
3680 : ** the PgHdr.needRead flag is set) then this routine acts as a promise
3681 : ** that we will never need to read the page content in the future.
3682 : ** so the needRead flag can be cleared at this point.
3683 : */
3684 0 : void sqlite3PagerDontRollback(DbPage *pPg){
3685 0 : Pager *pPager = pPg->pPager;
3686 :
3687 : assert( pPager->state>=PAGER_RESERVED );
3688 0 : if( pPager->journalOpen==0 ) return;
3689 0 : if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return;
3690 0 : if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
3691 : assert( pPager->aInJournal!=0 );
3692 0 : pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3693 0 : pPg->inJournal = 1;
3694 0 : pPg->needRead = 0;
3695 0 : if( pPager->stmtInUse ){
3696 0 : pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3697 : }
3698 : PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
3699 : IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
3700 : }
3701 0 : if( pPager->stmtInUse
3702 : && !pageInStatement(pPg)
3703 : && (int)pPg->pgno<=pPager->stmtSize
3704 : ){
3705 : assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
3706 : assert( pPager->aInStmt!=0 );
3707 0 : pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
3708 : }
3709 : }
3710 :
3711 :
3712 : /*
3713 : ** This routine is called to increment the database file change-counter,
3714 : ** stored at byte 24 of the pager file.
3715 : */
3716 14 : static int pager_incr_changecounter(Pager *pPager){
3717 : PgHdr *pPgHdr;
3718 : u32 change_counter;
3719 : int rc;
3720 :
3721 14 : if( !pPager->changeCountDone ){
3722 : /* Open page 1 of the file for writing. */
3723 14 : rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
3724 14 : if( rc!=SQLITE_OK ) return rc;
3725 14 : rc = sqlite3PagerWrite(pPgHdr);
3726 14 : if( rc!=SQLITE_OK ) return rc;
3727 :
3728 : /* Read the current value at byte 24. */
3729 14 : change_counter = retrieve32bits(pPgHdr, 24);
3730 :
3731 : /* Increment the value just read and write it back to byte 24. */
3732 14 : change_counter++;
3733 14 : put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
3734 :
3735 : /* Release the page reference. */
3736 14 : sqlite3PagerUnref(pPgHdr);
3737 14 : pPager->changeCountDone = 1;
3738 : }
3739 14 : return SQLITE_OK;
3740 : }
3741 :
3742 : /*
3743 : ** Sync the database file for the pager pPager. zMaster points to the name
3744 : ** of a master journal file that should be written into the individual
3745 : ** journal file. zMaster may be NULL, which is interpreted as no master
3746 : ** journal (a single database transaction).
3747 : **
3748 : ** This routine ensures that the journal is synced, all dirty pages written
3749 : ** to the database file and the database file synced. The only thing that
3750 : ** remains to commit the transaction is to delete the journal file (or
3751 : ** master journal file if specified).
3752 : **
3753 : ** Note that if zMaster==NULL, this does not overwrite a previous value
3754 : ** passed to an sqlite3PagerCommitPhaseOne() call.
3755 : **
3756 : ** If parameter nTrunc is non-zero, then the pager file is truncated to
3757 : ** nTrunc pages (this is used by auto-vacuum databases).
3758 : */
3759 209 : int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
3760 209 : int rc = SQLITE_OK;
3761 :
3762 : PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n",
3763 : pPager->zFilename, zMaster, nTrunc);
3764 :
3765 : /* If this is an in-memory db, or no pages have been written to, or this
3766 : ** function has already been called, it is a no-op.
3767 : */
3768 223 : if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
3769 : PgHdr *pPg;
3770 : assert( pPager->journalOpen );
3771 :
3772 : /* If a master journal file name has already been written to the
3773 : ** journal file, then no sync is required. This happens when it is
3774 : ** written, then the process fails to upgrade from a RESERVED to an
3775 : ** EXCLUSIVE lock. The next time the process tries to commit the
3776 : ** transaction the m-j name will have already been written.
3777 : */
3778 14 : if( !pPager->setMaster ){
3779 14 : rc = pager_incr_changecounter(pPager);
3780 14 : if( rc!=SQLITE_OK ) goto sync_exit;
3781 : #ifndef SQLITE_OMIT_AUTOVACUUM
3782 14 : if( nTrunc!=0 ){
3783 : /* If this transaction has made the database smaller, then all pages
3784 : ** being discarded by the truncation must be written to the journal
3785 : ** file.
3786 : */
3787 : Pgno i;
3788 0 : int iSkip = PAGER_MJ_PGNO(pPager);
3789 0 : for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
3790 0 : if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
3791 0 : rc = sqlite3PagerGet(pPager, i, &pPg);
3792 0 : if( rc!=SQLITE_OK ) goto sync_exit;
3793 0 : rc = sqlite3PagerWrite(pPg);
3794 0 : sqlite3PagerUnref(pPg);
3795 0 : if( rc!=SQLITE_OK ) goto sync_exit;
3796 : }
3797 : }
3798 : }
3799 : #endif
3800 14 : rc = writeMasterJournal(pPager, zMaster);
3801 14 : if( rc!=SQLITE_OK ) goto sync_exit;
3802 14 : rc = syncJournal(pPager);
3803 14 : if( rc!=SQLITE_OK ) goto sync_exit;
3804 : }
3805 :
3806 : #ifndef SQLITE_OMIT_AUTOVACUUM
3807 14 : if( nTrunc!=0 ){
3808 0 : rc = sqlite3PagerTruncate(pPager, nTrunc);
3809 0 : if( rc!=SQLITE_OK ) goto sync_exit;
3810 : }
3811 : #endif
3812 :
3813 : /* Write all dirty pages to the database file */
3814 14 : pPg = pager_get_all_dirty_pages(pPager);
3815 14 : rc = pager_write_pagelist(pPg);
3816 14 : if( rc!=SQLITE_OK ) goto sync_exit;
3817 14 : pPager->pDirty = 0;
3818 :
3819 : /* Sync the database file. */
3820 14 : if( !pPager->noSync ){
3821 14 : rc = sqlite3OsSync(pPager->fd, 0);
3822 : }
3823 : IOTRACE(("DBSYNC %p\n", pPager))
3824 :
3825 14 : pPager->state = PAGER_SYNCED;
3826 195 : }else if( MEMDB && nTrunc!=0 ){
3827 0 : rc = sqlite3PagerTruncate(pPager, nTrunc);
3828 : }
3829 :
3830 209 : sync_exit:
3831 209 : return rc;
3832 : }
3833 :
3834 :
3835 : /*
3836 : ** Commit all changes to the database and release the write lock.
3837 : **
3838 : ** If the commit fails for any reason, a rollback attempt is made
3839 : ** and an error code is returned. If the commit worked, SQLITE_OK
3840 : ** is returned.
3841 : */
3842 209 : int sqlite3PagerCommitPhaseTwo(Pager *pPager){
3843 : int rc;
3844 : PgHdr *pPg;
3845 :
3846 209 : if( pPager->errCode ){
3847 0 : return pPager->errCode;
3848 : }
3849 209 : if( pPager->state<PAGER_RESERVED ){
3850 0 : return SQLITE_ERROR;
3851 : }
3852 : PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
3853 209 : if( MEMDB ){
3854 195 : pPg = pager_get_all_dirty_pages(pPager);
3855 800 : while( pPg ){
3856 410 : PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3857 410 : clearHistory(pHist);
3858 410 : pPg->dirty = 0;
3859 410 : pPg->inJournal = 0;
3860 410 : pHist->inStmt = 0;
3861 410 : pPg->needSync = 0;
3862 410 : pHist->pPrevStmt = pHist->pNextStmt = 0;
3863 410 : pPg = pPg->pDirty;
3864 : }
3865 195 : pPager->pDirty = 0;
3866 : #ifndef NDEBUG
3867 : for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
3868 : PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
3869 : assert( !pPg->alwaysRollback );
3870 : assert( !pHist->pOrig );
3871 : assert( !pHist->pStmt );
3872 : }
3873 : #endif
3874 195 : pPager->pStmt = 0;
3875 195 : pPager->state = PAGER_SHARED;
3876 195 : return SQLITE_OK;
3877 : }
3878 : assert( pPager->journalOpen || !pPager->dirtyCache );
3879 : assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
3880 14 : rc = pager_end_transaction(pPager);
3881 14 : return pager_error(pPager, rc);
3882 : }
3883 :
3884 : /*
3885 : ** Rollback all changes. The database falls back to PAGER_SHARED mode.
3886 : ** All in-memory cache pages revert to their original data contents.
3887 : ** The journal is deleted.
3888 : **
3889 : ** This routine cannot fail unless some other process is not following
3890 : ** the correct locking protocol or unless some other
3891 : ** process is writing trash into the journal file (SQLITE_CORRUPT) or
3892 : ** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
3893 : ** codes are returned for all these occasions. Otherwise,
3894 : ** SQLITE_OK is returned.
3895 : */
3896 2 : int sqlite3PagerRollback(Pager *pPager){
3897 : int rc;
3898 : PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
3899 2 : if( MEMDB ){
3900 : PgHdr *p;
3901 4 : for(p=pPager->pAll; p; p=p->pNextAll){
3902 : PgHistory *pHist;
3903 : assert( !p->alwaysRollback );
3904 3 : if( !p->dirty ){
3905 : assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
3906 : assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
3907 1 : continue;
3908 : }
3909 :
3910 2 : pHist = PGHDR_TO_HIST(p, pPager);
3911 2 : if( pHist->pOrig ){
3912 2 : memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
3913 : PAGERTRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
3914 : }else{
3915 : PAGERTRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
3916 : }
3917 2 : clearHistory(pHist);
3918 2 : p->dirty = 0;
3919 2 : p->inJournal = 0;
3920 2 : pHist->inStmt = 0;
3921 2 : pHist->pPrevStmt = pHist->pNextStmt = 0;
3922 2 : if( pPager->xReiniter ){
3923 2 : pPager->xReiniter(p, pPager->pageSize);
3924 : }
3925 : }
3926 1 : pPager->pDirty = 0;
3927 1 : pPager->pStmt = 0;
3928 1 : pPager->dbSize = pPager->origDbSize;
3929 1 : pager_truncate_cache(pPager);
3930 1 : pPager->stmtInUse = 0;
3931 1 : pPager->state = PAGER_SHARED;
3932 1 : return SQLITE_OK;
3933 : }
3934 :
3935 1 : if( !pPager->dirtyCache || !pPager->journalOpen ){
3936 0 : rc = pager_end_transaction(pPager);
3937 0 : return rc;
3938 : }
3939 :
3940 1 : if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
3941 0 : if( pPager->state>=PAGER_EXCLUSIVE ){
3942 0 : pager_playback(pPager, 0);
3943 : }
3944 0 : return pPager->errCode;
3945 : }
3946 1 : if( pPager->state==PAGER_RESERVED ){
3947 : int rc2;
3948 1 : rc = pager_playback(pPager, 0);
3949 1 : rc2 = pager_end_transaction(pPager);
3950 1 : if( rc==SQLITE_OK ){
3951 1 : rc = rc2;
3952 : }
3953 : }else{
3954 0 : rc = pager_playback(pPager, 0);
3955 : }
3956 : /* pager_reset(pPager); */
3957 1 : pPager->dbSize = -1;
3958 :
3959 : /* If an error occurs during a ROLLBACK, we can no longer trust the pager
3960 : ** cache. So call pager_error() on the way out to make any error
3961 : ** persistent.
3962 : */
3963 1 : return pager_error(pPager, rc);
3964 : }
3965 :
3966 : /*
3967 : ** Return TRUE if the database file is opened read-only. Return FALSE
3968 : ** if the database is (in theory) writable.
3969 : */
3970 130 : int sqlite3PagerIsreadonly(Pager *pPager){
3971 130 : return pPager->readOnly;
3972 : }
3973 :
3974 : /*
3975 : ** Return the number of references to the pager.
3976 : */
3977 754 : int sqlite3PagerRefcount(Pager *pPager){
3978 754 : return pPager->nRef;
3979 : }
3980 :
3981 : #ifdef SQLITE_TEST
3982 : /*
3983 : ** This routine is used for testing and analysis only.
3984 : */
3985 : int *sqlite3PagerStats(Pager *pPager){
3986 : static int a[11];
3987 : a[0] = pPager->nRef;
3988 : a[1] = pPager->nPage;
3989 : a[2] = pPager->mxPage;
3990 : a[3] = pPager->dbSize;
3991 : a[4] = pPager->state;
3992 : a[5] = pPager->errCode;
3993 : a[6] = pPager->nHit;
3994 : a[7] = pPager->nMiss;
3995 : a[8] = 0; /* Used to be pPager->nOvfl */
3996 : a[9] = pPager->nRead;
3997 : a[10] = pPager->nWrite;
3998 : return a;
3999 : }
4000 : #endif
4001 :
4002 : /*
4003 : ** Set the statement rollback point.
4004 : **
4005 : ** This routine should be called with the transaction journal already
4006 : ** open. A new statement journal is created that can be used to rollback
4007 : ** changes of a single SQL command within a larger transaction.
4008 : */
4009 0 : int sqlite3PagerStmtBegin(Pager *pPager){
4010 : int rc;
4011 : assert( !pPager->stmtInUse );
4012 : assert( pPager->state>=PAGER_SHARED );
4013 : assert( pPager->dbSize>=0 );
4014 : PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
4015 0 : if( MEMDB ){
4016 0 : pPager->stmtInUse = 1;
4017 0 : pPager->stmtSize = pPager->dbSize;
4018 0 : return SQLITE_OK;
4019 : }
4020 0 : if( !pPager->journalOpen ){
4021 0 : pPager->stmtAutoopen = 1;
4022 0 : return SQLITE_OK;
4023 : }
4024 : assert( pPager->journalOpen );
4025 0 : pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 );
4026 0 : if( pPager->aInStmt==0 ){
4027 : /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
4028 0 : return SQLITE_NOMEM;
4029 : }
4030 : #ifndef NDEBUG
4031 : rc = sqlite3OsFileSize(pPager->jfd, &pPager->stmtJSize);
4032 : if( rc ) goto stmt_begin_failed;
4033 : assert( pPager->stmtJSize == pPager->journalOff );
4034 : #endif
4035 0 : pPager->stmtJSize = pPager->journalOff;
4036 0 : pPager->stmtSize = pPager->dbSize;
4037 0 : pPager->stmtHdrOff = 0;
4038 0 : pPager->stmtCksum = pPager->cksumInit;
4039 0 : if( !pPager->stmtOpen ){
4040 0 : rc = sqlite3PagerOpentemp(&pPager->stfd);
4041 0 : if( rc ) goto stmt_begin_failed;
4042 0 : pPager->stmtOpen = 1;
4043 0 : pPager->stmtNRec = 0;
4044 : }
4045 0 : pPager->stmtInUse = 1;
4046 0 : return SQLITE_OK;
4047 :
4048 0 : stmt_begin_failed:
4049 0 : if( pPager->aInStmt ){
4050 0 : sqliteFree(pPager->aInStmt);
4051 0 : pPager->aInStmt = 0;
4052 : }
4053 0 : return rc;
4054 : }
4055 :
4056 : /*
4057 : ** Commit a statement.
4058 : */
4059 15 : int sqlite3PagerStmtCommit(Pager *pPager){
4060 15 : if( pPager->stmtInUse ){
4061 : PgHdr *pPg, *pNext;
4062 : PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
4063 0 : if( !MEMDB ){
4064 0 : sqlite3OsSeek(pPager->stfd, 0);
4065 : /* sqlite3OsTruncate(pPager->stfd, 0); */
4066 0 : sqliteFree( pPager->aInStmt );
4067 0 : pPager->aInStmt = 0;
4068 : }else{
4069 0 : for(pPg=pPager->pStmt; pPg; pPg=pNext){
4070 0 : PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
4071 0 : pNext = pHist->pNextStmt;
4072 : assert( pHist->inStmt );
4073 0 : pHist->inStmt = 0;
4074 0 : pHist->pPrevStmt = pHist->pNextStmt = 0;
4075 0 : sqliteFree(pHist->pStmt);
4076 0 : pHist->pStmt = 0;
4077 : }
4078 : }
4079 0 : pPager->stmtNRec = 0;
4080 0 : pPager->stmtInUse = 0;
4081 0 : pPager->pStmt = 0;
4082 : }
4083 15 : pPager->stmtAutoopen = 0;
4084 15 : return SQLITE_OK;
4085 : }
4086 :
4087 : /*
4088 : ** Rollback a statement.
4089 : */
4090 0 : int sqlite3PagerStmtRollback(Pager *pPager){
4091 : int rc;
4092 0 : if( pPager->stmtInUse ){
4093 : PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
4094 0 : if( MEMDB ){
4095 : PgHdr *pPg;
4096 : PgHistory *pHist;
4097 0 : for(pPg=pPager->pStmt; pPg; pPg=pHist->pNextStmt){
4098 0 : pHist = PGHDR_TO_HIST(pPg, pPager);
4099 0 : if( pHist->pStmt ){
4100 0 : memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
4101 0 : sqliteFree(pHist->pStmt);
4102 0 : pHist->pStmt = 0;
4103 : }
4104 : }
4105 0 : pPager->dbSize = pPager->stmtSize;
4106 0 : pager_truncate_cache(pPager);
4107 0 : rc = SQLITE_OK;
4108 : }else{
4109 0 : rc = pager_stmt_playback(pPager);
4110 : }
4111 0 : sqlite3PagerStmtCommit(pPager);
4112 : }else{
4113 0 : rc = SQLITE_OK;
4114 : }
4115 0 : pPager->stmtAutoopen = 0;
4116 0 : return rc;
4117 : }
4118 :
4119 : /*
4120 : ** Return the full pathname of the database file.
4121 : */
4122 562 : const char *sqlite3PagerFilename(Pager *pPager){
4123 562 : return pPager->zFilename;
4124 : }
4125 :
4126 : /*
4127 : ** Return the directory of the database file.
4128 : */
4129 0 : const char *sqlite3PagerDirname(Pager *pPager){
4130 0 : return pPager->zDirectory;
4131 : }
4132 :
4133 : /*
4134 : ** Return the full pathname of the journal file.
4135 : */
4136 0 : const char *sqlite3PagerJournalname(Pager *pPager){
4137 0 : return pPager->zJournal;
4138 : }
4139 :
4140 : /*
4141 : ** Return true if fsync() calls are disabled for this pager. Return FALSE
4142 : ** if fsync()s are executed normally.
4143 : */
4144 0 : int sqlite3PagerNosync(Pager *pPager){
4145 0 : return pPager->noSync;
4146 : }
4147 :
4148 : #ifdef SQLITE_HAS_CODEC
4149 : /*
4150 : ** Set the codec for this pager
4151 : */
4152 : void sqlite3PagerSetCodec(
4153 : Pager *pPager,
4154 : void *(*xCodec)(void*,void*,Pgno,int),
4155 : void *pCodecArg
4156 : ){
4157 : pPager->xCodec = xCodec;
4158 : pPager->pCodecArg = pCodecArg;
4159 : }
4160 : #endif
4161 :
4162 : #ifndef SQLITE_OMIT_AUTOVACUUM
4163 : /*
4164 : ** Move the page identified by pData to location pgno in the file.
4165 : **
4166 : ** There must be no references to the current page pgno. If current page
4167 : ** pgno is not already in the rollback journal, it is not written there by
4168 : ** by this routine. The same applies to the page pData refers to on entry to
4169 : ** this routine.
4170 : **
4171 : ** References to the page refered to by pData remain valid. Updating any
4172 : ** meta-data associated with page pData (i.e. data stored in the nExtra bytes
4173 : ** allocated along with the page) is the responsibility of the caller.
4174 : **
4175 : ** A transaction must be active when this routine is called. It used to be
4176 : ** required that a statement transaction was not active, but this restriction
4177 : ** has been removed (CREATE INDEX needs to move a page when a statement
4178 : ** transaction is active).
4179 : */
4180 0 : int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno){
4181 : PgHdr *pPgOld;
4182 : int h;
4183 0 : Pgno needSyncPgno = 0;
4184 :
4185 : assert( pPg->nRef>0 );
4186 :
4187 : PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n",
4188 : PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
4189 : IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
4190 :
4191 0 : if( pPg->needSync ){
4192 0 : needSyncPgno = pPg->pgno;
4193 : assert( pPg->inJournal );
4194 : assert( pPg->dirty );
4195 : assert( pPager->needSync );
4196 : }
4197 :
4198 : /* Unlink pPg from it's hash-chain */
4199 0 : unlinkHashChain(pPager, pPg);
4200 :
4201 : /* If the cache contains a page with page-number pgno, remove it
4202 : ** from it's hash chain. Also, if the PgHdr.needSync was set for
4203 : ** page pgno before the 'move' operation, it needs to be retained
4204 : ** for the page moved there.
4205 : */
4206 0 : pPgOld = pager_lookup(pPager, pgno);
4207 0 : if( pPgOld ){
4208 : assert( pPgOld->nRef==0 );
4209 0 : unlinkHashChain(pPager, pPgOld);
4210 0 : makeClean(pPgOld);
4211 0 : if( pPgOld->needSync ){
4212 : assert( pPgOld->inJournal );
4213 0 : pPg->inJournal = 1;
4214 0 : pPg->needSync = 1;
4215 : assert( pPager->needSync );
4216 : }
4217 : }
4218 :
4219 : /* Change the page number for pPg and insert it into the new hash-chain. */
4220 : assert( pgno!=0 );
4221 0 : pPg->pgno = pgno;
4222 0 : h = pgno & (pPager->nHash-1);
4223 0 : if( pPager->aHash[h] ){
4224 : assert( pPager->aHash[h]->pPrevHash==0 );
4225 0 : pPager->aHash[h]->pPrevHash = pPg;
4226 : }
4227 0 : pPg->pNextHash = pPager->aHash[h];
4228 0 : pPager->aHash[h] = pPg;
4229 0 : pPg->pPrevHash = 0;
4230 :
4231 0 : makeDirty(pPg);
4232 0 : pPager->dirtyCache = 1;
4233 :
4234 0 : if( needSyncPgno ){
4235 : /* If needSyncPgno is non-zero, then the journal file needs to be
4236 : ** sync()ed before any data is written to database file page needSyncPgno.
4237 : ** Currently, no such page exists in the page-cache and the
4238 : ** Pager.aInJournal bit has been set. This needs to be remedied by loading
4239 : ** the page into the pager-cache and setting the PgHdr.needSync flag.
4240 : **
4241 : ** The sqlite3PagerGet() call may cause the journal to sync. So make
4242 : ** sure the Pager.needSync flag is set too.
4243 : */
4244 : int rc;
4245 : PgHdr *pPgHdr;
4246 : assert( pPager->needSync );
4247 0 : rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
4248 0 : if( rc!=SQLITE_OK ) return rc;
4249 0 : pPager->needSync = 1;
4250 0 : pPgHdr->needSync = 1;
4251 0 : pPgHdr->inJournal = 1;
4252 0 : makeDirty(pPgHdr);
4253 0 : sqlite3PagerUnref(pPgHdr);
4254 : }
4255 :
4256 0 : return SQLITE_OK;
4257 : }
4258 : #endif
4259 :
4260 : /*
4261 : ** Return a pointer to the data for the specified page.
4262 : */
4263 2100 : void *sqlite3PagerGetData(DbPage *pPg){
4264 2100 : return PGHDR_TO_DATA(pPg);
4265 : }
4266 :
4267 : /*
4268 : ** Return a pointer to the Pager.nExtra bytes of "extra" space
4269 : ** allocated along with the specified page.
4270 : */
4271 2900 : void *sqlite3PagerGetExtra(DbPage *pPg){
4272 2900 : Pager *pPager = pPg->pPager;
4273 2900 : return (pPager?PGHDR_TO_EXTRA(pPg, pPager):0);
4274 : }
4275 :
4276 : /*
4277 : ** Get/set the locking-mode for this pager. Parameter eMode must be one
4278 : ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or
4279 : ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
4280 : ** the locking-mode is set to the value specified.
4281 : **
4282 : ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
4283 : ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
4284 : ** locking-mode.
4285 : */
4286 0 : int sqlite3PagerLockingMode(Pager *pPager, int eMode){
4287 : assert( eMode==PAGER_LOCKINGMODE_QUERY
4288 : || eMode==PAGER_LOCKINGMODE_NORMAL
4289 : || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
4290 : assert( PAGER_LOCKINGMODE_QUERY<0 );
4291 : assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
4292 0 : if( eMode>=0 && !pPager->tempFile ){
4293 0 : pPager->exclusiveMode = eMode;
4294 : }
4295 0 : return (int)pPager->exclusiveMode;
4296 : }
4297 :
4298 : #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
4299 : /*
4300 : ** Return the current state of the file lock for the given pager.
4301 : ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK,
4302 : ** PENDING_LOCK, or EXCLUSIVE_LOCK.
4303 : */
4304 : int sqlite3PagerLockstate(Pager *pPager){
4305 : return sqlite3OsLockState(pPager->fd);
4306 : }
4307 : #endif
4308 :
4309 : #ifdef SQLITE_DEBUG
4310 : /*
4311 : ** Print a listing of all referenced pages and their ref count.
4312 : */
4313 : void sqlite3PagerRefdump(Pager *pPager){
4314 : PgHdr *pPg;
4315 : for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
4316 : if( pPg->nRef<=0 ) continue;
4317 : sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n",
4318 : pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef);
4319 : }
4320 : }
4321 : #endif
4322 :
4323 : #endif /* SQLITE_OMIT_DISKIO */
|