1 : /*
2 : ** 2004 May 22
3 : **
4 : ** The author disclaims copyright to this source code. In place of
5 : ** a legal notice, here is a blessing:
6 : **
7 : ** May you do good and not evil.
8 : ** May you find forgiveness for yourself and forgive others.
9 : ** May you share freely, never taking more than you give.
10 : **
11 : ******************************************************************************
12 : **
13 : ** This file contains code that is specific to Unix systems.
14 : */
15 : #include "sqliteInt.h"
16 : #include "os.h"
17 : #if OS_UNIX /* This file is used on unix only */
18 :
19 : /* #define SQLITE_ENABLE_LOCKING_STYLE 0 */
20 :
21 : /*
22 : ** These #defines should enable >2GB file support on Posix if the
23 : ** underlying operating system supports it. If the OS lacks
24 : ** large file support, these should be no-ops.
25 : **
26 : ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
27 : ** on the compiler command line. This is necessary if you are compiling
28 : ** on a recent machine (ex: RedHat 7.2) but you want your code to work
29 : ** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
30 : ** without this option, LFS is enable. But LFS does not exist in the kernel
31 : ** in RedHat 6.0, so the code won't work. Hence, for maximum binary
32 : ** portability you should omit LFS.
33 : */
34 : #ifndef SQLITE_DISABLE_LFS
35 : # define _LARGE_FILE 1
36 : # ifndef _FILE_OFFSET_BITS
37 : # define _FILE_OFFSET_BITS 64
38 : # endif
39 : # define _LARGEFILE_SOURCE 1
40 : #endif
41 :
42 : /*
43 : ** standard include files.
44 : */
45 : #include <sys/types.h>
46 : #include <sys/stat.h>
47 : #include <fcntl.h>
48 : #include <unistd.h>
49 : #include <time.h>
50 : #include <sys/time.h>
51 : #include <errno.h>
52 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
53 : #include <sys/ioctl.h>
54 : #include <sys/param.h>
55 : #include <sys/mount.h>
56 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
57 :
58 : /*
59 : ** If we are to be thread-safe, include the pthreads header and define
60 : ** the SQLITE_UNIX_THREADS macro.
61 : */
62 : #ifndef THREADSAFE
63 : # define THREADSAFE 1
64 : #endif
65 : #if THREADSAFE
66 : # include <pthread.h>
67 : # define SQLITE_UNIX_THREADS 1
68 : #endif
69 :
70 : /*
71 : ** Default permissions when creating a new file
72 : */
73 : #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
74 : # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
75 : #endif
76 :
77 :
78 :
79 : /*
80 : ** The unixFile structure is subclass of OsFile specific for the unix
81 : ** protability layer.
82 : */
83 : typedef struct unixFile unixFile;
84 : struct unixFile {
85 : IoMethod const *pMethod; /* Always the first entry */
86 : struct openCnt *pOpen; /* Info about all open fd's on this inode */
87 : struct lockInfo *pLock; /* Info about locks on this inode */
88 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
89 : void *lockingContext; /* Locking style specific state */
90 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
91 : int h; /* The file descriptor */
92 : unsigned char locktype; /* The type of lock held on this fd */
93 : unsigned char isOpen; /* True if needs to be closed */
94 : unsigned char fullSync; /* Use F_FULLSYNC if available */
95 : int dirfd; /* File descriptor for the directory */
96 : i64 offset; /* Seek offset */
97 : #ifdef SQLITE_UNIX_THREADS
98 : pthread_t tid; /* The thread that "owns" this OsFile */
99 : #endif
100 : };
101 :
102 : /*
103 : ** Provide the ability to override some OS-layer functions during
104 : ** testing. This is used to simulate OS crashes to verify that
105 : ** commits are atomic even in the event of an OS crash.
106 : */
107 : #ifdef SQLITE_CRASH_TEST
108 : extern int sqlite3CrashTestEnable;
109 : extern int sqlite3CrashOpenReadWrite(const char*, OsFile**, int*);
110 : extern int sqlite3CrashOpenExclusive(const char*, OsFile**, int);
111 : extern int sqlite3CrashOpenReadOnly(const char*, OsFile**, int);
112 : # define CRASH_TEST_OVERRIDE(X,A,B,C) \
113 : if(sqlite3CrashTestEnable){ return X(A,B,C); }
114 : #else
115 : # define CRASH_TEST_OVERRIDE(X,A,B,C) /* no-op */
116 : #endif
117 :
118 :
119 : /*
120 : ** Include code that is common to all os_*.c files
121 : */
122 : #include "os_common.h"
123 :
124 : /*
125 : ** Do not include any of the File I/O interface procedures if the
126 : ** SQLITE_OMIT_DISKIO macro is defined (indicating that the database
127 : ** will be in-memory only)
128 : */
129 : #ifndef SQLITE_OMIT_DISKIO
130 :
131 :
132 : /*
133 : ** Define various macros that are missing from some systems.
134 : */
135 : #ifndef O_LARGEFILE
136 : # define O_LARGEFILE 0
137 : #endif
138 : #ifdef SQLITE_DISABLE_LFS
139 : # undef O_LARGEFILE
140 : # define O_LARGEFILE 0
141 : #endif
142 : #ifndef O_NOFOLLOW
143 : # define O_NOFOLLOW 0
144 : #endif
145 : #ifndef O_BINARY
146 : # define O_BINARY 0
147 : #endif
148 :
149 : /*
150 : ** The DJGPP compiler environment looks mostly like Unix, but it
151 : ** lacks the fcntl() system call. So redefine fcntl() to be something
152 : ** that always succeeds. This means that locking does not occur under
153 : ** DJGPP. But it's DOS - what did you expect?
154 : */
155 : #ifdef __DJGPP__
156 : # define fcntl(A,B,C) 0
157 : #endif
158 :
159 : /*
160 : ** The threadid macro resolves to the thread-id or to 0. Used for
161 : ** testing and debugging only.
162 : */
163 : #ifdef SQLITE_UNIX_THREADS
164 : #define threadid pthread_self()
165 : #else
166 : #define threadid 0
167 : #endif
168 :
169 : /*
170 : ** Set or check the OsFile.tid field. This field is set when an OsFile
171 : ** is first opened. All subsequent uses of the OsFile verify that the
172 : ** same thread is operating on the OsFile. Some operating systems do
173 : ** not allow locks to be overridden by other threads and that restriction
174 : ** means that sqlite3* database handles cannot be moved from one thread
175 : ** to another. This logic makes sure a user does not try to do that
176 : ** by mistake.
177 : **
178 : ** Version 3.3.1 (2006-01-15): OsFiles can be moved from one thread to
179 : ** another as long as we are running on a system that supports threads
180 : ** overriding each others locks (which now the most common behavior)
181 : ** or if no locks are held. But the OsFile.pLock field needs to be
182 : ** recomputed because its key includes the thread-id. See the
183 : ** transferOwnership() function below for additional information
184 : */
185 : #if defined(SQLITE_UNIX_THREADS)
186 : # define SET_THREADID(X) (X)->tid = pthread_self()
187 : # define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
188 : !pthread_equal((X)->tid, pthread_self()))
189 : #else
190 : # define SET_THREADID(X)
191 : # define CHECK_THREADID(X) 0
192 : #endif
193 :
194 : /*
195 : ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
196 : ** section 6.5.2.2 lines 483 through 490 specify that when a process
197 : ** sets or clears a lock, that operation overrides any prior locks set
198 : ** by the same process. It does not explicitly say so, but this implies
199 : ** that it overrides locks set by the same process using a different
200 : ** file descriptor. Consider this test case:
201 : **
202 : ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
203 : ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
204 : **
205 : ** Suppose ./file1 and ./file2 are really the same file (because
206 : ** one is a hard or symbolic link to the other) then if you set
207 : ** an exclusive lock on fd1, then try to get an exclusive lock
208 : ** on fd2, it works. I would have expected the second lock to
209 : ** fail since there was already a lock on the file due to fd1.
210 : ** But not so. Since both locks came from the same process, the
211 : ** second overrides the first, even though they were on different
212 : ** file descriptors opened on different file names.
213 : **
214 : ** Bummer. If you ask me, this is broken. Badly broken. It means
215 : ** that we cannot use POSIX locks to synchronize file access among
216 : ** competing threads of the same process. POSIX locks will work fine
217 : ** to synchronize access for threads in separate processes, but not
218 : ** threads within the same process.
219 : **
220 : ** To work around the problem, SQLite has to manage file locks internally
221 : ** on its own. Whenever a new database is opened, we have to find the
222 : ** specific inode of the database file (the inode is determined by the
223 : ** st_dev and st_ino fields of the stat structure that fstat() fills in)
224 : ** and check for locks already existing on that inode. When locks are
225 : ** created or removed, we have to look at our own internal record of the
226 : ** locks to see if another thread has previously set a lock on that same
227 : ** inode.
228 : **
229 : ** The OsFile structure for POSIX is no longer just an integer file
230 : ** descriptor. It is now a structure that holds the integer file
231 : ** descriptor and a pointer to a structure that describes the internal
232 : ** locks on the corresponding inode. There is one locking structure
233 : ** per inode, so if the same inode is opened twice, both OsFile structures
234 : ** point to the same locking structure. The locking structure keeps
235 : ** a reference count (so we will know when to delete it) and a "cnt"
236 : ** field that tells us its internal lock status. cnt==0 means the
237 : ** file is unlocked. cnt==-1 means the file has an exclusive lock.
238 : ** cnt>0 means there are cnt shared locks on the file.
239 : **
240 : ** Any attempt to lock or unlock a file first checks the locking
241 : ** structure. The fcntl() system call is only invoked to set a
242 : ** POSIX lock if the internal lock structure transitions between
243 : ** a locked and an unlocked state.
244 : **
245 : ** 2004-Jan-11:
246 : ** More recent discoveries about POSIX advisory locks. (The more
247 : ** I discover, the more I realize the a POSIX advisory locks are
248 : ** an abomination.)
249 : **
250 : ** If you close a file descriptor that points to a file that has locks,
251 : ** all locks on that file that are owned by the current process are
252 : ** released. To work around this problem, each OsFile structure contains
253 : ** a pointer to an openCnt structure. There is one openCnt structure
254 : ** per open inode, which means that multiple OsFiles can point to a single
255 : ** openCnt. When an attempt is made to close an OsFile, if there are
256 : ** other OsFiles open on the same inode that are holding locks, the call
257 : ** to close() the file descriptor is deferred until all of the locks clear.
258 : ** The openCnt structure keeps a list of file descriptors that need to
259 : ** be closed and that list is walked (and cleared) when the last lock
260 : ** clears.
261 : **
262 : ** First, under Linux threads, because each thread has a separate
263 : ** process ID, lock operations in one thread do not override locks
264 : ** to the same file in other threads. Linux threads behave like
265 : ** separate processes in this respect. But, if you close a file
266 : ** descriptor in linux threads, all locks are cleared, even locks
267 : ** on other threads and even though the other threads have different
268 : ** process IDs. Linux threads is inconsistent in this respect.
269 : ** (I'm beginning to think that linux threads is an abomination too.)
270 : ** The consequence of this all is that the hash table for the lockInfo
271 : ** structure has to include the process id as part of its key because
272 : ** locks in different threads are treated as distinct. But the
273 : ** openCnt structure should not include the process id in its
274 : ** key because close() clears lock on all threads, not just the current
275 : ** thread. Were it not for this goofiness in linux threads, we could
276 : ** combine the lockInfo and openCnt structures into a single structure.
277 : **
278 : ** 2004-Jun-28:
279 : ** On some versions of linux, threads can override each others locks.
280 : ** On others not. Sometimes you can change the behavior on the same
281 : ** system by setting the LD_ASSUME_KERNEL environment variable. The
282 : ** POSIX standard is silent as to which behavior is correct, as far
283 : ** as I can tell, so other versions of unix might show the same
284 : ** inconsistency. There is no little doubt in my mind that posix
285 : ** advisory locks and linux threads are profoundly broken.
286 : **
287 : ** To work around the inconsistencies, we have to test at runtime
288 : ** whether or not threads can override each others locks. This test
289 : ** is run once, the first time any lock is attempted. A static
290 : ** variable is set to record the results of this test for future
291 : ** use.
292 : */
293 :
294 : /*
295 : ** An instance of the following structure serves as the key used
296 : ** to locate a particular lockInfo structure given its inode.
297 : **
298 : ** If threads cannot override each others locks, then we set the
299 : ** lockKey.tid field to the thread ID. If threads can override
300 : ** each others locks then tid is always set to zero. tid is omitted
301 : ** if we compile without threading support.
302 : */
303 : struct lockKey {
304 : dev_t dev; /* Device number */
305 : ino_t ino; /* Inode number */
306 : #ifdef SQLITE_UNIX_THREADS
307 : pthread_t tid; /* Thread ID or zero if threads can override each other */
308 : #endif
309 : };
310 :
311 : /*
312 : ** An instance of the following structure is allocated for each open
313 : ** inode on each thread with a different process ID. (Threads have
314 : ** different process IDs on linux, but not on most other unixes.)
315 : **
316 : ** A single inode can have multiple file descriptors, so each OsFile
317 : ** structure contains a pointer to an instance of this object and this
318 : ** object keeps a count of the number of OsFiles pointing to it.
319 : */
320 : struct lockInfo {
321 : struct lockKey key; /* The lookup key */
322 : int cnt; /* Number of SHARED locks held */
323 : int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
324 : int nRef; /* Number of pointers to this structure */
325 : };
326 :
327 : /*
328 : ** An instance of the following structure serves as the key used
329 : ** to locate a particular openCnt structure given its inode. This
330 : ** is the same as the lockKey except that the thread ID is omitted.
331 : */
332 : struct openKey {
333 : dev_t dev; /* Device number */
334 : ino_t ino; /* Inode number */
335 : };
336 :
337 : /*
338 : ** An instance of the following structure is allocated for each open
339 : ** inode. This structure keeps track of the number of locks on that
340 : ** inode. If a close is attempted against an inode that is holding
341 : ** locks, the close is deferred until all locks clear by adding the
342 : ** file descriptor to be closed to the pending list.
343 : */
344 : struct openCnt {
345 : struct openKey key; /* The lookup key */
346 : int nRef; /* Number of pointers to this structure */
347 : int nLock; /* Number of outstanding locks */
348 : int nPending; /* Number of pending close() operations */
349 : int *aPending; /* Malloced space holding fd's awaiting a close() */
350 : };
351 :
352 : /*
353 : ** These hash tables map inodes and file descriptors (really, lockKey and
354 : ** openKey structures) into lockInfo and openCnt structures. Access to
355 : ** these hash tables must be protected by a mutex.
356 : */
357 : static Hash lockHash = {SQLITE_HASH_BINARY, 0, 0, 0,
358 : sqlite3ThreadSafeMalloc, sqlite3ThreadSafeFree, 0, 0};
359 : static Hash openHash = {SQLITE_HASH_BINARY, 0, 0, 0,
360 : sqlite3ThreadSafeMalloc, sqlite3ThreadSafeFree, 0, 0};
361 :
362 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
363 : /*
364 : ** The locking styles are associated with the different file locking
365 : ** capabilities supported by different file systems.
366 : **
367 : ** POSIX locking style fully supports shared and exclusive byte-range locks
368 : ** ADP locking only supports exclusive byte-range locks
369 : ** FLOCK only supports a single file-global exclusive lock
370 : ** DOTLOCK isn't a true locking style, it refers to the use of a special
371 : ** file named the same as the database file with a '.lock' extension, this
372 : ** can be used on file systems that do not offer any reliable file locking
373 : ** NO locking means that no locking will be attempted, this is only used for
374 : ** read-only file systems currently
375 : ** UNSUPPORTED means that no locking will be attempted, this is only used for
376 : ** file systems that are known to be unsupported
377 : */
378 : typedef enum {
379 : posixLockingStyle = 0, /* standard posix-advisory locks */
380 : afpLockingStyle, /* use afp locks */
381 : flockLockingStyle, /* use flock() */
382 : dotlockLockingStyle, /* use <file>.lock files */
383 : noLockingStyle, /* useful for read-only file system */
384 : unsupportedLockingStyle /* indicates unsupported file system */
385 : } sqlite3LockingStyle;
386 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
387 :
388 : #ifdef SQLITE_UNIX_THREADS
389 : /*
390 : ** This variable records whether or not threads can override each others
391 : ** locks.
392 : **
393 : ** 0: No. Threads cannot override each others locks.
394 : ** 1: Yes. Threads can override each others locks.
395 : ** -1: We don't know yet.
396 : **
397 : ** On some systems, we know at compile-time if threads can override each
398 : ** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
399 : ** will be set appropriately. On other systems, we have to check at
400 : ** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
401 : ** undefined.
402 : **
403 : ** This variable normally has file scope only. But during testing, we make
404 : ** it a global so that the test code can change its value in order to verify
405 : ** that the right stuff happens in either case.
406 : */
407 : #ifndef SQLITE_THREAD_OVERRIDE_LOCK
408 : # define SQLITE_THREAD_OVERRIDE_LOCK -1
409 : #endif
410 : #ifdef SQLITE_TEST
411 : int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
412 : #else
413 : static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
414 : #endif
415 :
416 : /*
417 : ** This structure holds information passed into individual test
418 : ** threads by the testThreadLockingBehavior() routine.
419 : */
420 : struct threadTestData {
421 : int fd; /* File to be locked */
422 : struct flock lock; /* The locking operation */
423 : int result; /* Result of the locking operation */
424 : };
425 :
426 : #ifdef SQLITE_LOCK_TRACE
427 : /*
428 : ** Print out information about all locking operations.
429 : **
430 : ** This routine is used for troubleshooting locks on multithreaded
431 : ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
432 : ** command-line option on the compiler. This code is normally
433 : ** turned off.
434 : */
435 : static int lockTrace(int fd, int op, struct flock *p){
436 : char *zOpName, *zType;
437 : int s;
438 : int savedErrno;
439 : if( op==F_GETLK ){
440 : zOpName = "GETLK";
441 : }else if( op==F_SETLK ){
442 : zOpName = "SETLK";
443 : }else{
444 : s = fcntl(fd, op, p);
445 : sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
446 : return s;
447 : }
448 : if( p->l_type==F_RDLCK ){
449 : zType = "RDLCK";
450 : }else if( p->l_type==F_WRLCK ){
451 : zType = "WRLCK";
452 : }else if( p->l_type==F_UNLCK ){
453 : zType = "UNLCK";
454 : }else{
455 : assert( 0 );
456 : }
457 : assert( p->l_whence==SEEK_SET );
458 : s = fcntl(fd, op, p);
459 : savedErrno = errno;
460 : sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
461 : threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
462 : (int)p->l_pid, s);
463 : if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
464 : struct flock l2;
465 : l2 = *p;
466 : fcntl(fd, F_GETLK, &l2);
467 : if( l2.l_type==F_RDLCK ){
468 : zType = "RDLCK";
469 : }else if( l2.l_type==F_WRLCK ){
470 : zType = "WRLCK";
471 : }else if( l2.l_type==F_UNLCK ){
472 : zType = "UNLCK";
473 : }else{
474 : assert( 0 );
475 : }
476 : sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
477 : zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
478 : }
479 : errno = savedErrno;
480 : return s;
481 : }
482 : #define fcntl lockTrace
483 : #endif /* SQLITE_LOCK_TRACE */
484 :
485 : /*
486 : ** The testThreadLockingBehavior() routine launches two separate
487 : ** threads on this routine. This routine attempts to lock a file
488 : ** descriptor then returns. The success or failure of that attempt
489 : ** allows the testThreadLockingBehavior() procedure to determine
490 : ** whether or not threads can override each others locks.
491 : */
492 : static void *threadLockingTest(void *pArg){
493 : struct threadTestData *pData = (struct threadTestData*)pArg;
494 : pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
495 : return pArg;
496 : }
497 :
498 : /*
499 : ** This procedure attempts to determine whether or not threads
500 : ** can override each others locks then sets the
501 : ** threadsOverrideEachOthersLocks variable appropriately.
502 : */
503 : static void testThreadLockingBehavior(int fd_orig){
504 : int fd;
505 : struct threadTestData d[2];
506 : pthread_t t[2];
507 :
508 : fd = dup(fd_orig);
509 : if( fd<0 ) return;
510 : memset(d, 0, sizeof(d));
511 : d[0].fd = fd;
512 : d[0].lock.l_type = F_RDLCK;
513 : d[0].lock.l_len = 1;
514 : d[0].lock.l_start = 0;
515 : d[0].lock.l_whence = SEEK_SET;
516 : d[1] = d[0];
517 : d[1].lock.l_type = F_WRLCK;
518 : pthread_create(&t[0], 0, threadLockingTest, &d[0]);
519 : pthread_create(&t[1], 0, threadLockingTest, &d[1]);
520 : pthread_join(t[0], 0);
521 : pthread_join(t[1], 0);
522 : close(fd);
523 : threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
524 : }
525 : #endif /* SQLITE_UNIX_THREADS */
526 :
527 : /*
528 : ** Release a lockInfo structure previously allocated by findLockInfo().
529 : */
530 28 : static void releaseLockInfo(struct lockInfo *pLock){
531 : assert( sqlite3OsInMutex(1) );
532 28 : if (pLock == NULL)
533 0 : return;
534 28 : pLock->nRef--;
535 28 : if( pLock->nRef==0 ){
536 28 : sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
537 28 : sqlite3ThreadSafeFree(pLock);
538 : }
539 : }
540 :
541 : /*
542 : ** Release a openCnt structure previously allocated by findLockInfo().
543 : */
544 28 : static void releaseOpenCnt(struct openCnt *pOpen){
545 : assert( sqlite3OsInMutex(1) );
546 28 : if (pOpen == NULL)
547 0 : return;
548 28 : pOpen->nRef--;
549 28 : if( pOpen->nRef==0 ){
550 28 : sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
551 28 : free(pOpen->aPending);
552 28 : sqlite3ThreadSafeFree(pOpen);
553 : }
554 : }
555 :
556 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
557 : /*
558 : ** Tests a byte-range locking query to see if byte range locks are
559 : ** supported, if not we fall back to dotlockLockingStyle.
560 : */
561 : static sqlite3LockingStyle sqlite3TestLockingStyle(const char *filePath,
562 : int fd) {
563 : /* test byte-range lock using fcntl */
564 : struct flock lockInfo;
565 :
566 : lockInfo.l_len = 1;
567 : lockInfo.l_start = 0;
568 : lockInfo.l_whence = SEEK_SET;
569 : lockInfo.l_type = F_RDLCK;
570 :
571 : if (fcntl(fd, F_GETLK, &lockInfo) != -1) {
572 : return posixLockingStyle;
573 : }
574 :
575 : /* testing for flock can give false positives. So if if the above test
576 : ** fails, then we fall back to using dot-lock style locking.
577 : */
578 : return dotlockLockingStyle;
579 : }
580 :
581 : /*
582 : ** Examines the f_fstypename entry in the statfs structure as returned by
583 : ** stat() for the file system hosting the database file, assigns the
584 : ** appropriate locking style based on it's value. These values and
585 : ** assignments are based on Darwin/OSX behavior and have not been tested on
586 : ** other systems.
587 : */
588 : static sqlite3LockingStyle sqlite3DetectLockingStyle(const char *filePath,
589 : int fd) {
590 :
591 : #ifdef SQLITE_FIXED_LOCKING_STYLE
592 : return (sqlite3LockingStyle)SQLITE_FIXED_LOCKING_STYLE;
593 : #else
594 : struct statfs fsInfo;
595 :
596 : if (statfs(filePath, &fsInfo) == -1)
597 : return sqlite3TestLockingStyle(filePath, fd);
598 :
599 : if (fsInfo.f_flags & MNT_RDONLY)
600 : return noLockingStyle;
601 :
602 : if( (!strcmp(fsInfo.f_fstypename, "hfs")) ||
603 : (!strcmp(fsInfo.f_fstypename, "ufs")) )
604 : return posixLockingStyle;
605 :
606 : if(!strcmp(fsInfo.f_fstypename, "afpfs"))
607 : return afpLockingStyle;
608 :
609 : if(!strcmp(fsInfo.f_fstypename, "nfs"))
610 : return sqlite3TestLockingStyle(filePath, fd);
611 :
612 : if(!strcmp(fsInfo.f_fstypename, "smbfs"))
613 : return flockLockingStyle;
614 :
615 : if(!strcmp(fsInfo.f_fstypename, "msdos"))
616 : return dotlockLockingStyle;
617 :
618 : if(!strcmp(fsInfo.f_fstypename, "webdav"))
619 : return unsupportedLockingStyle;
620 :
621 : return sqlite3TestLockingStyle(filePath, fd);
622 : #endif // SQLITE_FIXED_LOCKING_STYLE
623 : }
624 :
625 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
626 :
627 : /*
628 : ** Given a file descriptor, locate lockInfo and openCnt structures that
629 : ** describes that file descriptor. Create new ones if necessary. The
630 : ** return values might be uninitialized if an error occurs.
631 : **
632 : ** Return the number of errors.
633 : */
634 : static int findLockInfo(
635 : int fd, /* The file descriptor used in the key */
636 : struct lockInfo **ppLock, /* Return the lockInfo structure here */
637 : struct openCnt **ppOpen /* Return the openCnt structure here */
638 28 : ){
639 : int rc;
640 : struct lockKey key1;
641 : struct openKey key2;
642 : struct stat statbuf;
643 : struct lockInfo *pLock;
644 : struct openCnt *pOpen;
645 28 : rc = fstat(fd, &statbuf);
646 28 : if( rc!=0 ) return 1;
647 :
648 : assert( sqlite3OsInMutex(1) );
649 28 : memset(&key1, 0, sizeof(key1));
650 28 : key1.dev = statbuf.st_dev;
651 28 : key1.ino = statbuf.st_ino;
652 : #ifdef SQLITE_UNIX_THREADS
653 : if( threadsOverrideEachOthersLocks<0 ){
654 : testThreadLockingBehavior(fd);
655 : }
656 : key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
657 : #endif
658 28 : memset(&key2, 0, sizeof(key2));
659 28 : key2.dev = statbuf.st_dev;
660 28 : key2.ino = statbuf.st_ino;
661 28 : pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
662 28 : if( pLock==0 ){
663 : struct lockInfo *pOld;
664 28 : pLock = sqlite3ThreadSafeMalloc( sizeof(*pLock) );
665 28 : if( pLock==0 ){
666 0 : rc = 1;
667 0 : goto exit_findlockinfo;
668 : }
669 28 : pLock->key = key1;
670 28 : pLock->nRef = 1;
671 28 : pLock->cnt = 0;
672 28 : pLock->locktype = 0;
673 28 : pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
674 28 : if( pOld!=0 ){
675 : assert( pOld==pLock );
676 0 : sqlite3ThreadSafeFree(pLock);
677 0 : rc = 1;
678 0 : goto exit_findlockinfo;
679 : }
680 : }else{
681 0 : pLock->nRef++;
682 : }
683 28 : *ppLock = pLock;
684 28 : if( ppOpen!=0 ){
685 28 : pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
686 28 : if( pOpen==0 ){
687 : struct openCnt *pOld;
688 28 : pOpen = sqlite3ThreadSafeMalloc( sizeof(*pOpen) );
689 28 : if( pOpen==0 ){
690 0 : releaseLockInfo(pLock);
691 0 : rc = 1;
692 0 : goto exit_findlockinfo;
693 : }
694 28 : pOpen->key = key2;
695 28 : pOpen->nRef = 1;
696 28 : pOpen->nLock = 0;
697 28 : pOpen->nPending = 0;
698 28 : pOpen->aPending = 0;
699 28 : pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
700 28 : if( pOld!=0 ){
701 : assert( pOld==pOpen );
702 0 : sqlite3ThreadSafeFree(pOpen);
703 0 : releaseLockInfo(pLock);
704 0 : rc = 1;
705 0 : goto exit_findlockinfo;
706 : }
707 : }else{
708 0 : pOpen->nRef++;
709 : }
710 28 : *ppOpen = pOpen;
711 : }
712 :
713 28 : exit_findlockinfo:
714 28 : return rc;
715 : }
716 :
717 : #ifdef SQLITE_DEBUG
718 : /*
719 : ** Helper function for printing out trace information from debugging
720 : ** binaries. This returns the string represetation of the supplied
721 : ** integer lock-type.
722 : */
723 : static const char *locktypeName(int locktype){
724 : switch( locktype ){
725 : case NO_LOCK: return "NONE";
726 : case SHARED_LOCK: return "SHARED";
727 : case RESERVED_LOCK: return "RESERVED";
728 : case PENDING_LOCK: return "PENDING";
729 : case EXCLUSIVE_LOCK: return "EXCLUSIVE";
730 : }
731 : return "ERROR";
732 : }
733 : #endif
734 :
735 : /*
736 : ** If we are currently in a different thread than the thread that the
737 : ** unixFile argument belongs to, then transfer ownership of the unixFile
738 : ** over to the current thread.
739 : **
740 : ** A unixFile is only owned by a thread on systems where one thread is
741 : ** unable to override locks created by a different thread. RedHat9 is
742 : ** an example of such a system.
743 : **
744 : ** Ownership transfer is only allowed if the unixFile is currently unlocked.
745 : ** If the unixFile is locked and an ownership is wrong, then return
746 : ** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
747 : */
748 : #ifdef SQLITE_UNIX_THREADS
749 : static int transferOwnership(unixFile *pFile){
750 : int rc;
751 : pthread_t hSelf;
752 : if( threadsOverrideEachOthersLocks ){
753 : /* Ownership transfers not needed on this system */
754 : return SQLITE_OK;
755 : }
756 : hSelf = pthread_self();
757 : if( pthread_equal(pFile->tid, hSelf) ){
758 : /* We are still in the same thread */
759 : OSTRACE1("No-transfer, same thread\n");
760 : return SQLITE_OK;
761 : }
762 : if( pFile->locktype!=NO_LOCK ){
763 : /* We cannot change ownership while we are holding a lock! */
764 : return SQLITE_MISUSE;
765 : }
766 : OSTRACE4("Transfer ownership of %d from %d to %d\n",
767 : pFile->h, pFile->tid, hSelf);
768 : pFile->tid = hSelf;
769 : if (pFile->pLock != NULL) {
770 : releaseLockInfo(pFile->pLock);
771 : rc = findLockInfo(pFile->h, &pFile->pLock, 0);
772 : OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h,
773 : locktypeName(pFile->locktype),
774 : locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
775 : return rc;
776 : } else {
777 : return SQLITE_OK;
778 : }
779 : }
780 : #else
781 : /* On single-threaded builds, ownership transfer is a no-op */
782 : # define transferOwnership(X) SQLITE_OK
783 : #endif
784 :
785 : /*
786 : ** Delete the named file
787 : */
788 15 : int sqlite3UnixDelete(const char *zFilename){
789 : SimulateIOError(return SQLITE_IOERR_DELETE);
790 15 : unlink(zFilename);
791 15 : return SQLITE_OK;
792 : }
793 :
794 : /*
795 : ** Return TRUE if the named file exists.
796 : */
797 21 : int sqlite3UnixFileExists(const char *zFilename){
798 21 : return access(zFilename, 0)==0;
799 : }
800 :
801 : /* Forward declaration */
802 : static int allocateUnixFile(
803 : int h, /* File descriptor of the open file */
804 : OsFile **pId, /* Write the real file descriptor here */
805 : const char *zFilename, /* Name of the file being opened */
806 : int delFlag /* If true, make sure the file deletes on close */
807 : );
808 :
809 : /*
810 : ** Attempt to open a file for both reading and writing. If that
811 : ** fails, try opening it read-only. If the file does not exist,
812 : ** try to create it.
813 : **
814 : ** On success, a handle for the open file is written to *id
815 : ** and *pReadonly is set to 0 if the file was opened for reading and
816 : ** writing or 1 if the file was opened read-only. The function returns
817 : ** SQLITE_OK.
818 : **
819 : ** On failure, the function returns SQLITE_CANTOPEN and leaves
820 : ** *id and *pReadonly unchanged.
821 : */
822 : int sqlite3UnixOpenReadWrite(
823 : const char *zFilename,
824 : OsFile **pId,
825 : int *pReadonly
826 13 : ){
827 : int h;
828 :
829 : CRASH_TEST_OVERRIDE(sqlite3CrashOpenReadWrite, zFilename, pId, pReadonly);
830 : assert( 0==*pId );
831 13 : h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
832 : SQLITE_DEFAULT_FILE_PERMISSIONS);
833 13 : if( h<0 ){
834 : #ifdef EISDIR
835 0 : if( errno==EISDIR ){
836 0 : return SQLITE_CANTOPEN;
837 : }
838 : #endif
839 0 : h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
840 0 : if( h<0 ){
841 0 : return SQLITE_CANTOPEN;
842 : }
843 0 : *pReadonly = 1;
844 : }else{
845 13 : *pReadonly = 0;
846 : }
847 13 : return allocateUnixFile(h, pId, zFilename, 0);
848 : }
849 :
850 :
851 : /*
852 : ** Attempt to open a new file for exclusive access by this process.
853 : ** The file will be opened for both reading and writing. To avoid
854 : ** a potential security problem, we do not allow the file to have
855 : ** previously existed. Nor do we allow the file to be a symbolic
856 : ** link.
857 : **
858 : ** If delFlag is true, then make arrangements to automatically delete
859 : ** the file when it is closed.
860 : **
861 : ** On success, write the file handle into *id and return SQLITE_OK.
862 : **
863 : ** On failure, return SQLITE_CANTOPEN.
864 : */
865 15 : int sqlite3UnixOpenExclusive(const char *zFilename, OsFile **pId, int delFlag){
866 : int h;
867 :
868 : CRASH_TEST_OVERRIDE(sqlite3CrashOpenExclusive, zFilename, pId, delFlag);
869 : assert( 0==*pId );
870 15 : h = open(zFilename,
871 : O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
872 : delFlag ? 0600 : SQLITE_DEFAULT_FILE_PERMISSIONS);
873 15 : if( h<0 ){
874 0 : return SQLITE_CANTOPEN;
875 : }
876 15 : return allocateUnixFile(h, pId, zFilename, delFlag);
877 : }
878 :
879 : /*
880 : ** Attempt to open a new file for read-only access.
881 : **
882 : ** On success, write the file handle into *id and return SQLITE_OK.
883 : **
884 : ** On failure, return SQLITE_CANTOPEN.
885 : */
886 0 : int sqlite3UnixOpenReadOnly(const char *zFilename, OsFile **pId){
887 : int h;
888 :
889 : CRASH_TEST_OVERRIDE(sqlite3CrashOpenReadOnly, zFilename, pId, 0);
890 : assert( 0==*pId );
891 0 : h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
892 0 : if( h<0 ){
893 0 : return SQLITE_CANTOPEN;
894 : }
895 0 : return allocateUnixFile(h, pId, zFilename, 0);
896 : }
897 :
898 : /*
899 : ** Attempt to open a file descriptor for the directory that contains a
900 : ** file. This file descriptor can be used to fsync() the directory
901 : ** in order to make sure the creation of a new file is actually written
902 : ** to disk.
903 : **
904 : ** This routine is only meaningful for Unix. It is a no-op under
905 : ** windows since windows does not support hard links.
906 : **
907 : ** If FULL_FSYNC is enabled, this function is not longer useful,
908 : ** a FULL_FSYNC sync applies to all pending disk operations.
909 : **
910 : ** On success, a handle for a previously open file at *id is
911 : ** updated with the new directory file descriptor and SQLITE_OK is
912 : ** returned.
913 : **
914 : ** On failure, the function returns SQLITE_CANTOPEN and leaves
915 : ** *id unchanged.
916 : */
917 : static int unixOpenDirectory(
918 : OsFile *id,
919 : const char *zDirname
920 15 : ){
921 15 : unixFile *pFile = (unixFile*)id;
922 : assert( pFile!=0 );
923 : SET_THREADID(pFile);
924 : assert( pFile->dirfd<0 );
925 15 : pFile->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
926 15 : if( pFile->dirfd<0 ){
927 0 : return SQLITE_CANTOPEN;
928 : }
929 : OSTRACE3("OPENDIR %-3d %s\n", pFile->dirfd, zDirname);
930 15 : return SQLITE_OK;
931 : }
932 :
933 : /*
934 : ** Create a temporary file name in zBuf. zBuf must be big enough to
935 : ** hold at least SQLITE_TEMPNAME_SIZE characters.
936 : */
937 0 : int sqlite3UnixTempFileName(char *zBuf){
938 : static const char *azDirs[] = {
939 : 0,
940 : "/var/tmp",
941 : "/usr/tmp",
942 : "/tmp",
943 : ".",
944 : };
945 : static const unsigned char zChars[] =
946 : "abcdefghijklmnopqrstuvwxyz"
947 : "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
948 : "0123456789";
949 : int i, j;
950 : struct stat buf;
951 0 : const char *zDir = ".";
952 0 : azDirs[0] = sqlite3_temp_directory;
953 0 : for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
954 0 : if( azDirs[i]==0 ) continue;
955 0 : if( stat(azDirs[i], &buf) ) continue;
956 0 : if( !S_ISDIR(buf.st_mode) ) continue;
957 0 : if( access(azDirs[i], 07) ) continue;
958 0 : zDir = azDirs[i];
959 0 : break;
960 : }
961 : do{
962 0 : sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
963 0 : j = strlen(zBuf);
964 0 : sqlite3Randomness(15, &zBuf[j]);
965 0 : for(i=0; i<15; i++, j++){
966 0 : zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
967 : }
968 0 : zBuf[j] = 0;
969 0 : }while( access(zBuf,0)==0 );
970 0 : return SQLITE_OK;
971 : }
972 :
973 : /*
974 : ** Check that a given pathname is a directory and is writable
975 : **
976 : */
977 0 : int sqlite3UnixIsDirWritable(char *zBuf){
978 : #ifndef SQLITE_OMIT_PAGER_PRAGMAS
979 : struct stat buf;
980 0 : if( zBuf==0 ) return 0;
981 0 : if( zBuf[0]==0 ) return 0;
982 0 : if( stat(zBuf, &buf) ) return 0;
983 0 : if( !S_ISDIR(buf.st_mode) ) return 0;
984 0 : if( access(zBuf, 07) ) return 0;
985 : #endif /* SQLITE_OMIT_PAGER_PRAGMAS */
986 0 : return 1;
987 : }
988 :
989 : /*
990 : ** Seek to the offset in id->offset then read cnt bytes into pBuf.
991 : ** Return the number of bytes actually read. Update the offset.
992 : */
993 48 : static int seekAndRead(unixFile *id, void *pBuf, int cnt){
994 : int got;
995 : i64 newOffset;
996 : TIMER_START;
997 : #if defined(USE_PREAD)
998 : got = pread(id->h, pBuf, cnt, id->offset);
999 : SimulateIOError( got = -1 );
1000 : #elif defined(USE_PREAD64)
1001 : got = pread64(id->h, pBuf, cnt, id->offset);
1002 : SimulateIOError( got = -1 );
1003 : #else
1004 48 : newOffset = lseek(id->h, id->offset, SEEK_SET);
1005 : SimulateIOError( newOffset-- );
1006 48 : if( newOffset!=id->offset ){
1007 0 : return -1;
1008 : }
1009 48 : got = read(id->h, pBuf, cnt);
1010 : #endif
1011 : TIMER_END;
1012 : OSTRACE5("READ %-3d %5d %7lld %d\n", id->h, got, id->offset, TIMER_ELAPSED);
1013 48 : if( got>0 ){
1014 42 : id->offset += got;
1015 : }
1016 48 : return got;
1017 : }
1018 :
1019 : /*
1020 : ** Read data from a file into a buffer. Return SQLITE_OK if all
1021 : ** bytes were read successfully and SQLITE_IOERR if anything goes
1022 : ** wrong.
1023 : */
1024 48 : static int unixRead(OsFile *id, void *pBuf, int amt){
1025 : int got;
1026 : assert( id );
1027 48 : got = seekAndRead((unixFile*)id, pBuf, amt);
1028 48 : if( got==amt ){
1029 42 : return SQLITE_OK;
1030 6 : }else if( got<0 ){
1031 0 : return SQLITE_IOERR_READ;
1032 : }else{
1033 6 : memset(&((char*)pBuf)[got], 0, amt-got);
1034 6 : return SQLITE_IOERR_SHORT_READ;
1035 : }
1036 : }
1037 :
1038 : /*
1039 : ** Seek to the offset in id->offset then read cnt bytes into pBuf.
1040 : ** Return the number of bytes actually read. Update the offset.
1041 : */
1042 98 : static int seekAndWrite(unixFile *id, const void *pBuf, int cnt){
1043 : int got;
1044 : i64 newOffset;
1045 : TIMER_START;
1046 : #if defined(USE_PREAD)
1047 : got = pwrite(id->h, pBuf, cnt, id->offset);
1048 : #elif defined(USE_PREAD64)
1049 : got = pwrite64(id->h, pBuf, cnt, id->offset);
1050 : #else
1051 98 : newOffset = lseek(id->h, id->offset, SEEK_SET);
1052 98 : if( newOffset!=id->offset ){
1053 0 : return -1;
1054 : }
1055 98 : got = write(id->h, pBuf, cnt);
1056 : #endif
1057 : TIMER_END;
1058 : OSTRACE5("WRITE %-3d %5d %7lld %d\n", id->h, got, id->offset, TIMER_ELAPSED);
1059 98 : if( got>0 ){
1060 98 : id->offset += got;
1061 : }
1062 98 : return got;
1063 : }
1064 :
1065 :
1066 : /*
1067 : ** Write data from a buffer into a file. Return SQLITE_OK on success
1068 : ** or some other error code on failure.
1069 : */
1070 98 : static int unixWrite(OsFile *id, const void *pBuf, int amt){
1071 98 : int wrote = 0;
1072 : assert( id );
1073 : assert( amt>0 );
1074 294 : while( amt>0 && (wrote = seekAndWrite((unixFile*)id, pBuf, amt))>0 ){
1075 98 : amt -= wrote;
1076 98 : pBuf = &((char*)pBuf)[wrote];
1077 : }
1078 : SimulateIOError(( wrote=(-1), amt=1 ));
1079 : SimulateDiskfullError(( wrote=0, amt=1 ));
1080 98 : if( amt>0 ){
1081 0 : if( wrote<0 ){
1082 0 : return SQLITE_IOERR_WRITE;
1083 : }else{
1084 0 : return SQLITE_FULL;
1085 : }
1086 : }
1087 98 : return SQLITE_OK;
1088 : }
1089 :
1090 : /*
1091 : ** Move the read/write pointer in a file.
1092 : */
1093 127 : static int unixSeek(OsFile *id, i64 offset){
1094 : assert( id );
1095 : #ifdef SQLITE_TEST
1096 : if( offset ) SimulateDiskfullError(return SQLITE_FULL);
1097 : #endif
1098 127 : ((unixFile*)id)->offset = offset;
1099 127 : return SQLITE_OK;
1100 : }
1101 :
1102 : #ifdef SQLITE_TEST
1103 : /*
1104 : ** Count the number of fullsyncs and normal syncs. This is used to test
1105 : ** that syncs and fullsyncs are occuring at the right times.
1106 : */
1107 : int sqlite3_sync_count = 0;
1108 : int sqlite3_fullsync_count = 0;
1109 : #endif
1110 :
1111 : /*
1112 : ** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
1113 : ** Otherwise use fsync() in its place.
1114 : */
1115 : #ifndef HAVE_FDATASYNC
1116 : # define fdatasync fsync
1117 : #endif
1118 :
1119 : /*
1120 : ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
1121 : ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
1122 : ** only available on Mac OS X. But that could change.
1123 : */
1124 : #ifdef F_FULLFSYNC
1125 : # define HAVE_FULLFSYNC 1
1126 : #else
1127 : # define HAVE_FULLFSYNC 0
1128 : #endif
1129 :
1130 :
1131 : /*
1132 : ** The fsync() system call does not work as advertised on many
1133 : ** unix systems. The following procedure is an attempt to make
1134 : ** it work better.
1135 : **
1136 : ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
1137 : ** for testing when we want to run through the test suite quickly.
1138 : ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
1139 : ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
1140 : ** or power failure will likely corrupt the database file.
1141 : */
1142 56 : static int full_fsync(int fd, int fullSync, int dataOnly){
1143 : int rc;
1144 :
1145 : /* Record the number of times that we do a normal fsync() and
1146 : ** FULLSYNC. This is used during testing to verify that this procedure
1147 : ** gets called with the correct arguments.
1148 : */
1149 : #ifdef SQLITE_TEST
1150 : if( fullSync ) sqlite3_fullsync_count++;
1151 : sqlite3_sync_count++;
1152 : #endif
1153 :
1154 : /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
1155 : ** no-op
1156 : */
1157 : #ifdef SQLITE_NO_SYNC
1158 : rc = SQLITE_OK;
1159 : #else
1160 :
1161 : #if HAVE_FULLFSYNC
1162 : if( fullSync ){
1163 : rc = fcntl(fd, F_FULLFSYNC, 0);
1164 : }else{
1165 : rc = 1;
1166 : }
1167 : /* If the FULLFSYNC failed, fall back to attempting an fsync().
1168 : * It shouldn't be possible for fullfsync to fail on the local
1169 : * file system (on OSX), so failure indicates that FULLFSYNC
1170 : * isn't supported for this file system. So, attempt an fsync
1171 : * and (for now) ignore the overhead of a superfluous fcntl call.
1172 : * It'd be better to detect fullfsync support once and avoid
1173 : * the fcntl call every time sync is called.
1174 : */
1175 : if( rc ) rc = fsync(fd);
1176 :
1177 : #else
1178 56 : if( dataOnly ){
1179 0 : rc = fdatasync(fd);
1180 : }else{
1181 56 : rc = fsync(fd);
1182 : }
1183 : #endif /* HAVE_FULLFSYNC */
1184 : #endif /* defined(SQLITE_NO_SYNC) */
1185 :
1186 56 : return rc;
1187 : }
1188 :
1189 : /*
1190 : ** Make sure all writes to a particular file are committed to disk.
1191 : **
1192 : ** If dataOnly==0 then both the file itself and its metadata (file
1193 : ** size, access time, etc) are synced. If dataOnly!=0 then only the
1194 : ** file data is synced.
1195 : **
1196 : ** Under Unix, also make sure that the directory entry for the file
1197 : ** has been created by fsync-ing the directory that contains the file.
1198 : ** If we do not do this and we encounter a power failure, the directory
1199 : ** entry for the journal might not exist after we reboot. The next
1200 : ** SQLite to access the file will not know that the journal exists (because
1201 : ** the directory entry for the journal was never created) and the transaction
1202 : ** will not roll back - possibly leading to database corruption.
1203 : */
1204 42 : static int unixSync(OsFile *id, int dataOnly){
1205 : int rc;
1206 42 : unixFile *pFile = (unixFile*)id;
1207 : assert( pFile );
1208 : OSTRACE2("SYNC %-3d\n", pFile->h);
1209 42 : rc = full_fsync(pFile->h, pFile->fullSync, dataOnly);
1210 : SimulateIOError( rc=1 );
1211 42 : if( rc ){
1212 0 : return SQLITE_IOERR_FSYNC;
1213 : }
1214 42 : if( pFile->dirfd>=0 ){
1215 : OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
1216 : HAVE_FULLFSYNC, pFile->fullSync);
1217 : #ifndef SQLITE_DISABLE_DIRSYNC
1218 : /* The directory sync is only attempted if full_fsync is
1219 : ** turned off or unavailable. If a full_fsync occurred above,
1220 : ** then the directory sync is superfluous.
1221 : */
1222 14 : if( (!HAVE_FULLFSYNC || !pFile->fullSync) && full_fsync(pFile->dirfd,0,0) ){
1223 : /*
1224 : ** We have received multiple reports of fsync() returning
1225 : ** errors when applied to directories on certain file systems.
1226 : ** A failed directory sync is not a big deal. So it seems
1227 : ** better to ignore the error. Ticket #1657
1228 : */
1229 : /* return SQLITE_IOERR; */
1230 : }
1231 : #endif
1232 14 : close(pFile->dirfd); /* Only need to sync once, so close the directory */
1233 14 : pFile->dirfd = -1; /* when we are done. */
1234 : }
1235 42 : return SQLITE_OK;
1236 : }
1237 :
1238 : /*
1239 : ** Sync the directory zDirname. This is a no-op on operating systems other
1240 : ** than UNIX.
1241 : **
1242 : ** This is used to make sure the master journal file has truely been deleted
1243 : ** before making changes to individual journals on a multi-database commit.
1244 : ** The F_FULLFSYNC option is not needed here.
1245 : */
1246 0 : int sqlite3UnixSyncDirectory(const char *zDirname){
1247 : #ifdef SQLITE_DISABLE_DIRSYNC
1248 : return SQLITE_OK;
1249 : #else
1250 : int fd;
1251 : int r;
1252 0 : fd = open(zDirname, O_RDONLY|O_BINARY, 0);
1253 : OSTRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
1254 0 : if( fd<0 ){
1255 0 : return SQLITE_CANTOPEN;
1256 : }
1257 0 : r = fsync(fd);
1258 0 : close(fd);
1259 : SimulateIOError( r=1 );
1260 0 : if( r ){
1261 0 : return SQLITE_IOERR_DIR_FSYNC;
1262 : }else{
1263 0 : return SQLITE_OK;
1264 : }
1265 : #endif
1266 : }
1267 :
1268 : /*
1269 : ** Truncate an open file to a specified size
1270 : */
1271 0 : static int unixTruncate(OsFile *id, i64 nByte){
1272 : int rc;
1273 : assert( id );
1274 0 : rc = ftruncate(((unixFile*)id)->h, nByte);
1275 : SimulateIOError( rc=1 );
1276 0 : if( rc ){
1277 0 : return SQLITE_IOERR_TRUNCATE;
1278 : }else{
1279 0 : return SQLITE_OK;
1280 : }
1281 : }
1282 :
1283 : /*
1284 : ** Determine the current size of a file in bytes
1285 : */
1286 23 : static int unixFileSize(OsFile *id, i64 *pSize){
1287 : int rc;
1288 : struct stat buf;
1289 : assert( id );
1290 23 : rc = fstat(((unixFile*)id)->h, &buf);
1291 : SimulateIOError( rc=1 );
1292 23 : if( rc!=0 ){
1293 0 : return SQLITE_IOERR_FSTAT;
1294 : }
1295 23 : *pSize = buf.st_size;
1296 23 : return SQLITE_OK;
1297 : }
1298 :
1299 : /*
1300 : ** This routine checks if there is a RESERVED lock held on the specified
1301 : ** file by this or any other process. If such a lock is held, return
1302 : ** non-zero. If the file is unlocked or holds only SHARED locks, then
1303 : ** return zero.
1304 : */
1305 0 : static int unixCheckReservedLock(OsFile *id){
1306 0 : int r = 0;
1307 0 : unixFile *pFile = (unixFile*)id;
1308 :
1309 : assert( pFile );
1310 0 : sqlite3OsEnterMutex(); /* Because pFile->pLock is shared across threads */
1311 :
1312 : /* Check if a thread in this process holds such a lock */
1313 0 : if( pFile->pLock->locktype>SHARED_LOCK ){
1314 0 : r = 1;
1315 : }
1316 :
1317 : /* Otherwise see if some other process holds it.
1318 : */
1319 0 : if( !r ){
1320 : struct flock lock;
1321 0 : lock.l_whence = SEEK_SET;
1322 0 : lock.l_start = RESERVED_BYTE;
1323 0 : lock.l_len = 1;
1324 0 : lock.l_type = F_WRLCK;
1325 0 : fcntl(pFile->h, F_GETLK, &lock);
1326 0 : if( lock.l_type!=F_UNLCK ){
1327 0 : r = 1;
1328 : }
1329 : }
1330 :
1331 0 : sqlite3OsLeaveMutex();
1332 : OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
1333 :
1334 0 : return r;
1335 : }
1336 :
1337 : /*
1338 : ** Lock the file with the lock specified by parameter locktype - one
1339 : ** of the following:
1340 : **
1341 : ** (1) SHARED_LOCK
1342 : ** (2) RESERVED_LOCK
1343 : ** (3) PENDING_LOCK
1344 : ** (4) EXCLUSIVE_LOCK
1345 : **
1346 : ** Sometimes when requesting one lock state, additional lock states
1347 : ** are inserted in between. The locking might fail on one of the later
1348 : ** transitions leaving the lock state different from what it started but
1349 : ** still short of its goal. The following chart shows the allowed
1350 : ** transitions and the inserted intermediate states:
1351 : **
1352 : ** UNLOCKED -> SHARED
1353 : ** SHARED -> RESERVED
1354 : ** SHARED -> (PENDING) -> EXCLUSIVE
1355 : ** RESERVED -> (PENDING) -> EXCLUSIVE
1356 : ** PENDING -> EXCLUSIVE
1357 : **
1358 : ** This routine will only increase a lock. Use the sqlite3OsUnlock()
1359 : ** routine to lower a locking level.
1360 : */
1361 50 : static int unixLock(OsFile *id, int locktype){
1362 : /* The following describes the implementation of the various locks and
1363 : ** lock transitions in terms of the POSIX advisory shared and exclusive
1364 : ** lock primitives (called read-locks and write-locks below, to avoid
1365 : ** confusion with SQLite lock names). The algorithms are complicated
1366 : ** slightly in order to be compatible with windows systems simultaneously
1367 : ** accessing the same database file, in case that is ever required.
1368 : **
1369 : ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1370 : ** byte', each single bytes at well known offsets, and the 'shared byte
1371 : ** range', a range of 510 bytes at a well known offset.
1372 : **
1373 : ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1374 : ** byte'. If this is successful, a random byte from the 'shared byte
1375 : ** range' is read-locked and the lock on the 'pending byte' released.
1376 : **
1377 : ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1378 : ** A RESERVED lock is implemented by grabbing a write-lock on the
1379 : ** 'reserved byte'.
1380 : **
1381 : ** A process may only obtain a PENDING lock after it has obtained a
1382 : ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1383 : ** on the 'pending byte'. This ensures that no new SHARED locks can be
1384 : ** obtained, but existing SHARED locks are allowed to persist. A process
1385 : ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1386 : ** This property is used by the algorithm for rolling back a journal file
1387 : ** after a crash.
1388 : **
1389 : ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1390 : ** implemented by obtaining a write-lock on the entire 'shared byte
1391 : ** range'. Since all other locks require a read-lock on one of the bytes
1392 : ** within this range, this ensures that no other locks are held on the
1393 : ** database.
1394 : **
1395 : ** The reason a single byte cannot be used instead of the 'shared byte
1396 : ** range' is that some versions of windows do not support read-locks. By
1397 : ** locking a random byte from a range, concurrent SHARED locks may exist
1398 : ** even if the locking primitive used is always a write-lock.
1399 : */
1400 50 : int rc = SQLITE_OK;
1401 50 : unixFile *pFile = (unixFile*)id;
1402 50 : struct lockInfo *pLock = pFile->pLock;
1403 : struct flock lock;
1404 : int s;
1405 :
1406 : assert( pFile );
1407 : OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
1408 : locktypeName(locktype), locktypeName(pFile->locktype),
1409 : locktypeName(pLock->locktype), pLock->cnt , getpid());
1410 :
1411 : /* If there is already a lock of this type or more restrictive on the
1412 : ** OsFile, do nothing. Don't use the end_lock: exit path, as
1413 : ** sqlite3OsEnterMutex() hasn't been called yet.
1414 : */
1415 50 : if( pFile->locktype>=locktype ){
1416 : OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
1417 : locktypeName(locktype));
1418 0 : return SQLITE_OK;
1419 : }
1420 :
1421 : /* Make sure the locking sequence is correct
1422 : */
1423 : assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1424 : assert( locktype!=PENDING_LOCK );
1425 : assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1426 :
1427 : /* This mutex is needed because pFile->pLock is shared across threads
1428 : */
1429 50 : sqlite3OsEnterMutex();
1430 :
1431 : /* Make sure the current thread owns the pFile.
1432 : */
1433 50 : rc = transferOwnership(pFile);
1434 50 : if( rc!=SQLITE_OK ){
1435 0 : sqlite3OsLeaveMutex();
1436 0 : return rc;
1437 : }
1438 50 : pLock = pFile->pLock;
1439 :
1440 : /* If some thread using this PID has a lock via a different OsFile*
1441 : ** handle that precludes the requested lock, return BUSY.
1442 : */
1443 50 : if( (pFile->locktype!=pLock->locktype &&
1444 : (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
1445 : ){
1446 0 : rc = SQLITE_BUSY;
1447 0 : goto end_lock;
1448 : }
1449 :
1450 : /* If a SHARED lock is requested, and some thread using this PID already
1451 : ** has a SHARED or RESERVED lock, then increment reference counts and
1452 : ** return SQLITE_OK.
1453 : */
1454 50 : if( locktype==SHARED_LOCK &&
1455 : (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1456 : assert( locktype==SHARED_LOCK );
1457 : assert( pFile->locktype==0 );
1458 : assert( pLock->cnt>0 );
1459 0 : pFile->locktype = SHARED_LOCK;
1460 0 : pLock->cnt++;
1461 0 : pFile->pOpen->nLock++;
1462 0 : goto end_lock;
1463 : }
1464 :
1465 50 : lock.l_len = 1L;
1466 :
1467 50 : lock.l_whence = SEEK_SET;
1468 :
1469 : /* A PENDING lock is needed before acquiring a SHARED lock and before
1470 : ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1471 : ** be released.
1472 : */
1473 50 : if( locktype==SHARED_LOCK
1474 : || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
1475 : ){
1476 35 : lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
1477 35 : lock.l_start = PENDING_BYTE;
1478 35 : s = fcntl(pFile->h, F_SETLK, &lock);
1479 35 : if( s==(-1) ){
1480 0 : rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1481 0 : goto end_lock;
1482 : }
1483 : }
1484 :
1485 :
1486 : /* If control gets to this point, then actually go ahead and make
1487 : ** operating system calls for the specified lock.
1488 : */
1489 50 : if( locktype==SHARED_LOCK ){
1490 : assert( pLock->cnt==0 );
1491 : assert( pLock->locktype==0 );
1492 :
1493 : /* Now get the read-lock */
1494 21 : lock.l_start = SHARED_FIRST;
1495 21 : lock.l_len = SHARED_SIZE;
1496 21 : s = fcntl(pFile->h, F_SETLK, &lock);
1497 :
1498 : /* Drop the temporary PENDING lock */
1499 21 : lock.l_start = PENDING_BYTE;
1500 21 : lock.l_len = 1L;
1501 21 : lock.l_type = F_UNLCK;
1502 21 : if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
1503 0 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1504 0 : goto end_lock;
1505 : }
1506 21 : if( s==(-1) ){
1507 0 : rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1508 : }else{
1509 21 : pFile->locktype = SHARED_LOCK;
1510 21 : pFile->pOpen->nLock++;
1511 21 : pLock->cnt = 1;
1512 : }
1513 29 : }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1514 : /* We are trying for an exclusive lock but another thread in this
1515 : ** same process is still holding a shared lock. */
1516 0 : rc = SQLITE_BUSY;
1517 : }else{
1518 : /* The request was for a RESERVED or EXCLUSIVE lock. It is
1519 : ** assumed that there is a SHARED or greater lock on the file
1520 : ** already.
1521 : */
1522 : assert( 0!=pFile->locktype );
1523 29 : lock.l_type = F_WRLCK;
1524 29 : switch( locktype ){
1525 : case RESERVED_LOCK:
1526 15 : lock.l_start = RESERVED_BYTE;
1527 15 : break;
1528 : case EXCLUSIVE_LOCK:
1529 14 : lock.l_start = SHARED_FIRST;
1530 14 : lock.l_len = SHARED_SIZE;
1531 : break;
1532 : default:
1533 : assert(0);
1534 : }
1535 29 : s = fcntl(pFile->h, F_SETLK, &lock);
1536 29 : if( s==(-1) ){
1537 0 : rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1538 : }
1539 : }
1540 :
1541 50 : if( rc==SQLITE_OK ){
1542 50 : pFile->locktype = locktype;
1543 50 : pLock->locktype = locktype;
1544 0 : }else if( locktype==EXCLUSIVE_LOCK ){
1545 0 : pFile->locktype = PENDING_LOCK;
1546 0 : pLock->locktype = PENDING_LOCK;
1547 : }
1548 :
1549 50 : end_lock:
1550 50 : sqlite3OsLeaveMutex();
1551 : OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1552 : rc==SQLITE_OK ? "ok" : "failed");
1553 50 : return rc;
1554 : }
1555 :
1556 : /*
1557 : ** Lower the locking level on file descriptor pFile to locktype. locktype
1558 : ** must be either NO_LOCK or SHARED_LOCK.
1559 : **
1560 : ** If the locking level of the file descriptor is already at or below
1561 : ** the requested locking level, this routine is a no-op.
1562 : */
1563 77 : static int unixUnlock(OsFile *id, int locktype){
1564 : struct lockInfo *pLock;
1565 : struct flock lock;
1566 77 : int rc = SQLITE_OK;
1567 77 : unixFile *pFile = (unixFile*)id;
1568 :
1569 : assert( pFile );
1570 : OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
1571 : pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
1572 :
1573 : assert( locktype<=SHARED_LOCK );
1574 77 : if( pFile->locktype<=locktype ){
1575 41 : return SQLITE_OK;
1576 : }
1577 : if( CHECK_THREADID(pFile) ){
1578 : return SQLITE_MISUSE;
1579 : }
1580 36 : sqlite3OsEnterMutex();
1581 36 : pLock = pFile->pLock;
1582 : assert( pLock->cnt!=0 );
1583 36 : if( pFile->locktype>SHARED_LOCK ){
1584 : assert( pLock->locktype==pFile->locktype );
1585 15 : if( locktype==SHARED_LOCK ){
1586 15 : lock.l_type = F_RDLCK;
1587 15 : lock.l_whence = SEEK_SET;
1588 15 : lock.l_start = SHARED_FIRST;
1589 15 : lock.l_len = SHARED_SIZE;
1590 15 : if( fcntl(pFile->h, F_SETLK, &lock)==(-1) ){
1591 : /* This should never happen */
1592 0 : rc = SQLITE_IOERR_RDLOCK;
1593 : }
1594 : }
1595 15 : lock.l_type = F_UNLCK;
1596 15 : lock.l_whence = SEEK_SET;
1597 15 : lock.l_start = PENDING_BYTE;
1598 15 : lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
1599 15 : if( fcntl(pFile->h, F_SETLK, &lock)!=(-1) ){
1600 15 : pLock->locktype = SHARED_LOCK;
1601 : }else{
1602 0 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1603 : }
1604 : }
1605 36 : if( locktype==NO_LOCK ){
1606 : struct openCnt *pOpen;
1607 :
1608 : /* Decrement the shared lock counter. Release the lock using an
1609 : ** OS call only when all threads in this same process have released
1610 : ** the lock.
1611 : */
1612 21 : pLock->cnt--;
1613 21 : if( pLock->cnt==0 ){
1614 21 : lock.l_type = F_UNLCK;
1615 21 : lock.l_whence = SEEK_SET;
1616 21 : lock.l_start = lock.l_len = 0L;
1617 21 : if( fcntl(pFile->h, F_SETLK, &lock)!=(-1) ){
1618 21 : pLock->locktype = NO_LOCK;
1619 : }else{
1620 0 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1621 : }
1622 : }
1623 :
1624 : /* Decrement the count of locks against this same file. When the
1625 : ** count reaches zero, close any other file descriptors whose close
1626 : ** was deferred because of outstanding locks.
1627 : */
1628 21 : pOpen = pFile->pOpen;
1629 21 : pOpen->nLock--;
1630 : assert( pOpen->nLock>=0 );
1631 21 : if( pOpen->nLock==0 && pOpen->nPending>0 ){
1632 : int i;
1633 0 : for(i=0; i<pOpen->nPending; i++){
1634 0 : close(pOpen->aPending[i]);
1635 : }
1636 0 : free(pOpen->aPending);
1637 0 : pOpen->nPending = 0;
1638 0 : pOpen->aPending = 0;
1639 : }
1640 : }
1641 36 : sqlite3OsLeaveMutex();
1642 36 : pFile->locktype = locktype;
1643 36 : return rc;
1644 : }
1645 :
1646 : /*
1647 : ** Close a file.
1648 : */
1649 28 : static int unixClose(OsFile **pId){
1650 28 : unixFile *id = (unixFile*)*pId;
1651 :
1652 28 : if( !id ) return SQLITE_OK;
1653 28 : unixUnlock(*pId, NO_LOCK);
1654 28 : if( id->dirfd>=0 ) close(id->dirfd);
1655 28 : id->dirfd = -1;
1656 28 : sqlite3OsEnterMutex();
1657 :
1658 28 : if( id->pOpen->nLock ){
1659 : /* If there are outstanding locks, do not actually close the file just
1660 : ** yet because that would clear those locks. Instead, add the file
1661 : ** descriptor to pOpen->aPending. It will be automatically closed when
1662 : ** the last lock is cleared.
1663 : */
1664 : int *aNew;
1665 0 : struct openCnt *pOpen = id->pOpen;
1666 0 : aNew = realloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
1667 0 : if( aNew==0 ){
1668 : /* If a malloc fails, just leak the file descriptor */
1669 : }else{
1670 0 : pOpen->aPending = aNew;
1671 0 : pOpen->aPending[pOpen->nPending] = id->h;
1672 0 : pOpen->nPending++;
1673 : }
1674 : }else{
1675 : /* There are no outstanding locks so we can close the file immediately */
1676 28 : close(id->h);
1677 : }
1678 28 : releaseLockInfo(id->pLock);
1679 28 : releaseOpenCnt(id->pOpen);
1680 :
1681 28 : sqlite3OsLeaveMutex();
1682 28 : id->isOpen = 0;
1683 : OSTRACE2("CLOSE %-3d\n", id->h);
1684 : OpenCounter(-1);
1685 28 : sqlite3ThreadSafeFree(id);
1686 28 : *pId = 0;
1687 28 : return SQLITE_OK;
1688 : }
1689 :
1690 :
1691 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
1692 : #pragma mark AFP Support
1693 :
1694 : /*
1695 : ** The afpLockingContext structure contains all afp lock specific state
1696 : */
1697 : typedef struct afpLockingContext afpLockingContext;
1698 : struct afpLockingContext {
1699 : unsigned long long sharedLockByte;
1700 : char *filePath;
1701 : };
1702 :
1703 : struct ByteRangeLockPB2
1704 : {
1705 : unsigned long long offset; /* offset to first byte to lock */
1706 : unsigned long long length; /* nbr of bytes to lock */
1707 : unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
1708 : unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
1709 : unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
1710 : int fd; /* file desc to assoc this lock with */
1711 : };
1712 :
1713 : #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
1714 :
1715 : /* return 0 on success, 1 on failure. To match the behavior of the
1716 : normal posix file locking (used in unixLock for example), we should
1717 : provide 'richer' return codes - specifically to differentiate between
1718 : 'file busy' and 'file system error' results */
1719 : static int _AFPFSSetLock(const char *path, int fd, unsigned long long offset,
1720 : unsigned long long length, int setLockFlag)
1721 : {
1722 : struct ByteRangeLockPB2 pb;
1723 : int err;
1724 :
1725 : pb.unLockFlag = setLockFlag ? 0 : 1;
1726 : pb.startEndFlag = 0;
1727 : pb.offset = offset;
1728 : pb.length = length;
1729 : pb.fd = fd;
1730 : OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n",
1731 : (setLockFlag?"ON":"OFF"), fd, offset, length);
1732 : err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
1733 : if ( err==-1 ) {
1734 : OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, errno,
1735 : strerror(errno));
1736 : return 1; // error
1737 : } else {
1738 : return 0;
1739 : }
1740 : }
1741 :
1742 : /*
1743 : ** This routine checks if there is a RESERVED lock held on the specified
1744 : ** file by this or any other process. If such a lock is held, return
1745 : ** non-zero. If the file is unlocked or holds only SHARED locks, then
1746 : ** return zero.
1747 : */
1748 : static int afpUnixCheckReservedLock(OsFile *id){
1749 : int r = 0;
1750 : unixFile *pFile = (unixFile*)id;
1751 :
1752 : assert( pFile );
1753 : afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1754 :
1755 : /* Check if a thread in this process holds such a lock */
1756 : if( pFile->locktype>SHARED_LOCK ){
1757 : r = 1;
1758 : }
1759 :
1760 : /* Otherwise see if some other process holds it.
1761 : */
1762 : if ( !r ) {
1763 : // lock the byte
1764 : int failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
1765 : if (failed) {
1766 : /* if we failed to get the lock then someone else must have it */
1767 : r = 1;
1768 : } else {
1769 : /* if we succeeded in taking the reserved lock, unlock it to restore
1770 : ** the original state */
1771 : _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0);
1772 : }
1773 : }
1774 : OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
1775 :
1776 : return r;
1777 : }
1778 :
1779 : /* AFP-style locking following the behavior of unixLock, see the unixLock
1780 : ** function comments for details of lock management. */
1781 : static int afpUnixLock(OsFile *id, int locktype)
1782 : {
1783 : int rc = SQLITE_OK;
1784 : unixFile *pFile = (unixFile*)id;
1785 : afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1786 : int gotPendingLock = 0;
1787 :
1788 : assert( pFile );
1789 : OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
1790 : locktypeName(locktype), locktypeName(pFile->locktype), getpid());
1791 : /* If there is already a lock of this type or more restrictive on the
1792 : ** OsFile, do nothing. Don't use the afp_end_lock: exit path, as
1793 : ** sqlite3OsEnterMutex() hasn't been called yet.
1794 : */
1795 : if( pFile->locktype>=locktype ){
1796 : OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
1797 : locktypeName(locktype));
1798 : return SQLITE_OK;
1799 : }
1800 :
1801 : /* Make sure the locking sequence is correct
1802 : */
1803 : assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1804 : assert( locktype!=PENDING_LOCK );
1805 : assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1806 :
1807 : /* This mutex is needed because pFile->pLock is shared across threads
1808 : */
1809 : sqlite3OsEnterMutex();
1810 :
1811 : /* Make sure the current thread owns the pFile.
1812 : */
1813 : rc = transferOwnership(pFile);
1814 : if( rc!=SQLITE_OK ){
1815 : sqlite3OsLeaveMutex();
1816 : return rc;
1817 : }
1818 :
1819 : /* A PENDING lock is needed before acquiring a SHARED lock and before
1820 : ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1821 : ** be released.
1822 : */
1823 : if( locktype==SHARED_LOCK
1824 : || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
1825 : ){
1826 : int failed = _AFPFSSetLock(context->filePath, pFile->h,
1827 : PENDING_BYTE, 1, 1);
1828 : if (failed) {
1829 : rc = SQLITE_BUSY;
1830 : goto afp_end_lock;
1831 : }
1832 : }
1833 :
1834 : /* If control gets to this point, then actually go ahead and make
1835 : ** operating system calls for the specified lock.
1836 : */
1837 : if( locktype==SHARED_LOCK ){
1838 : int lk, failed;
1839 : int tries = 0;
1840 :
1841 : /* Now get the read-lock */
1842 : /* note that the quality of the randomness doesn't matter that much */
1843 : lk = random();
1844 : context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
1845 : failed = _AFPFSSetLock(context->filePath, pFile->h,
1846 : SHARED_FIRST+context->sharedLockByte, 1, 1);
1847 :
1848 : /* Drop the temporary PENDING lock */
1849 : if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)) {
1850 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1851 : goto afp_end_lock;
1852 : }
1853 :
1854 : if( failed ){
1855 : rc = SQLITE_BUSY;
1856 : } else {
1857 : pFile->locktype = SHARED_LOCK;
1858 : }
1859 : }else{
1860 : /* The request was for a RESERVED or EXCLUSIVE lock. It is
1861 : ** assumed that there is a SHARED or greater lock on the file
1862 : ** already.
1863 : */
1864 : int failed = 0;
1865 : assert( 0!=pFile->locktype );
1866 : if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
1867 : /* Acquire a RESERVED lock */
1868 : failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
1869 : }
1870 : if (!failed && locktype == EXCLUSIVE_LOCK) {
1871 : /* Acquire an EXCLUSIVE lock */
1872 :
1873 : /* Remove the shared lock before trying the range. we'll need to
1874 : ** reestablish the shared lock if we can't get the afpUnixUnlock
1875 : */
1876 : if (!_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
1877 : context->sharedLockByte, 1, 0)) {
1878 : /* now attemmpt to get the exclusive lock range */
1879 : failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST,
1880 : SHARED_SIZE, 1);
1881 : if (failed && _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
1882 : context->sharedLockByte, 1, 1)) {
1883 : rc = SQLITE_IOERR_RDLOCK; /* this should never happen */
1884 : }
1885 : } else {
1886 : /* */
1887 : rc = SQLITE_IOERR_UNLOCK; /* this should never happen */
1888 : }
1889 : }
1890 : if( failed && rc == SQLITE_OK){
1891 : rc = SQLITE_BUSY;
1892 : }
1893 : }
1894 :
1895 : if( rc==SQLITE_OK ){
1896 : pFile->locktype = locktype;
1897 : }else if( locktype==EXCLUSIVE_LOCK ){
1898 : pFile->locktype = PENDING_LOCK;
1899 : }
1900 :
1901 : afp_end_lock:
1902 : sqlite3OsLeaveMutex();
1903 : OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1904 : rc==SQLITE_OK ? "ok" : "failed");
1905 : return rc;
1906 : }
1907 :
1908 : /*
1909 : ** Lower the locking level on file descriptor pFile to locktype. locktype
1910 : ** must be either NO_LOCK or SHARED_LOCK.
1911 : **
1912 : ** If the locking level of the file descriptor is already at or below
1913 : ** the requested locking level, this routine is a no-op.
1914 : */
1915 : static int afpUnixUnlock(OsFile *id, int locktype) {
1916 : struct flock lock;
1917 : int rc = SQLITE_OK;
1918 : unixFile *pFile = (unixFile*)id;
1919 : afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1920 :
1921 : assert( pFile );
1922 : OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
1923 : pFile->locktype, getpid());
1924 :
1925 : assert( locktype<=SHARED_LOCK );
1926 : if( pFile->locktype<=locktype ){
1927 : return SQLITE_OK;
1928 : }
1929 : if( CHECK_THREADID(pFile) ){
1930 : return SQLITE_MISUSE;
1931 : }
1932 : sqlite3OsEnterMutex();
1933 : if( pFile->locktype>SHARED_LOCK ){
1934 : if( locktype==SHARED_LOCK ){
1935 : int failed = 0;
1936 :
1937 : /* unlock the exclusive range - then re-establish the shared lock */
1938 : if (pFile->locktype==EXCLUSIVE_LOCK) {
1939 : failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST,
1940 : SHARED_SIZE, 0);
1941 : if (!failed) {
1942 : /* successfully removed the exclusive lock */
1943 : if (_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST+
1944 : context->sharedLockByte, 1, 1)) {
1945 : /* failed to re-establish our shared lock */
1946 : rc = SQLITE_IOERR_RDLOCK; /* This should never happen */
1947 : }
1948 : } else {
1949 : /* This should never happen - failed to unlock the exclusive range */
1950 : rc = SQLITE_IOERR_UNLOCK;
1951 : }
1952 : }
1953 : }
1954 : if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
1955 : if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)){
1956 : /* failed to release the pending lock */
1957 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1958 : }
1959 : }
1960 : if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
1961 : if (_AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0)) {
1962 : /* failed to release the reserved lock */
1963 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1964 : }
1965 : }
1966 : }
1967 : if( locktype==NO_LOCK ){
1968 : int failed = _AFPFSSetLock(context->filePath, pFile->h,
1969 : SHARED_FIRST + context->sharedLockByte, 1, 0);
1970 : if (failed) {
1971 : rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1972 : }
1973 : }
1974 : if (rc == SQLITE_OK)
1975 : pFile->locktype = locktype;
1976 : sqlite3OsLeaveMutex();
1977 : return rc;
1978 : }
1979 :
1980 : /*
1981 : ** Close a file & cleanup AFP specific locking context
1982 : */
1983 : static int afpUnixClose(OsFile **pId) {
1984 : unixFile *id = (unixFile*)*pId;
1985 :
1986 : if( !id ) return SQLITE_OK;
1987 : afpUnixUnlock(*pId, NO_LOCK);
1988 : /* free the AFP locking structure */
1989 : if (id->lockingContext != NULL) {
1990 : if (((afpLockingContext *)id->lockingContext)->filePath != NULL)
1991 : sqlite3ThreadSafeFree(((afpLockingContext*)id->lockingContext)->filePath);
1992 : sqlite3ThreadSafeFree(id->lockingContext);
1993 : }
1994 :
1995 : if( id->dirfd>=0 ) close(id->dirfd);
1996 : id->dirfd = -1;
1997 : close(id->h);
1998 : id->isOpen = 0;
1999 : OSTRACE2("CLOSE %-3d\n", id->h);
2000 : OpenCounter(-1);
2001 : sqlite3ThreadSafeFree(id);
2002 : *pId = 0;
2003 : return SQLITE_OK;
2004 : }
2005 :
2006 :
2007 : #pragma mark flock() style locking
2008 :
2009 : /*
2010 : ** The flockLockingContext is not used
2011 : */
2012 : typedef void flockLockingContext;
2013 :
2014 : static int flockUnixCheckReservedLock(OsFile *id) {
2015 : unixFile *pFile = (unixFile*)id;
2016 :
2017 : if (pFile->locktype == RESERVED_LOCK) {
2018 : return 1; // already have a reserved lock
2019 : } else {
2020 : // attempt to get the lock
2021 : int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
2022 : if (!rc) {
2023 : // got the lock, unlock it
2024 : flock(pFile->h, LOCK_UN);
2025 : return 0; // no one has it reserved
2026 : }
2027 : return 1; // someone else might have it reserved
2028 : }
2029 : }
2030 :
2031 : static int flockUnixLock(OsFile *id, int locktype) {
2032 : unixFile *pFile = (unixFile*)id;
2033 :
2034 : // if we already have a lock, it is exclusive.
2035 : // Just adjust level and punt on outta here.
2036 : if (pFile->locktype > NO_LOCK) {
2037 : pFile->locktype = locktype;
2038 : return SQLITE_OK;
2039 : }
2040 :
2041 : // grab an exclusive lock
2042 : int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
2043 : if (rc) {
2044 : // didn't get, must be busy
2045 : return SQLITE_BUSY;
2046 : } else {
2047 : // got it, set the type and return ok
2048 : pFile->locktype = locktype;
2049 : return SQLITE_OK;
2050 : }
2051 : }
2052 :
2053 : static int flockUnixUnlock(OsFile *id, int locktype) {
2054 : unixFile *pFile = (unixFile*)id;
2055 :
2056 : assert( locktype<=SHARED_LOCK );
2057 :
2058 : // no-op if possible
2059 : if( pFile->locktype==locktype ){
2060 : return SQLITE_OK;
2061 : }
2062 :
2063 : // shared can just be set because we always have an exclusive
2064 : if (locktype==SHARED_LOCK) {
2065 : pFile->locktype = locktype;
2066 : return SQLITE_OK;
2067 : }
2068 :
2069 : // no, really, unlock.
2070 : int rc = flock(pFile->h, LOCK_UN);
2071 : if (rc)
2072 : return SQLITE_IOERR_UNLOCK;
2073 : else {
2074 : pFile->locktype = NO_LOCK;
2075 : return SQLITE_OK;
2076 : }
2077 : }
2078 :
2079 : /*
2080 : ** Close a file.
2081 : */
2082 : static int flockUnixClose(OsFile **pId) {
2083 : unixFile *id = (unixFile*)*pId;
2084 :
2085 : if( !id ) return SQLITE_OK;
2086 : flockUnixUnlock(*pId, NO_LOCK);
2087 :
2088 : if( id->dirfd>=0 ) close(id->dirfd);
2089 : id->dirfd = -1;
2090 : sqlite3OsEnterMutex();
2091 :
2092 : close(id->h);
2093 : sqlite3OsLeaveMutex();
2094 : id->isOpen = 0;
2095 : OSTRACE2("CLOSE %-3d\n", id->h);
2096 : OpenCounter(-1);
2097 : sqlite3ThreadSafeFree(id);
2098 : *pId = 0;
2099 : return SQLITE_OK;
2100 : }
2101 :
2102 : #pragma mark Old-School .lock file based locking
2103 :
2104 : /*
2105 : ** The dotlockLockingContext structure contains all dotlock (.lock) lock
2106 : ** specific state
2107 : */
2108 : typedef struct dotlockLockingContext dotlockLockingContext;
2109 : struct dotlockLockingContext {
2110 : char *lockPath;
2111 : };
2112 :
2113 :
2114 : static int dotlockUnixCheckReservedLock(OsFile *id) {
2115 : unixFile *pFile = (unixFile*)id;
2116 : dotlockLockingContext *context =
2117 : (dotlockLockingContext *) pFile->lockingContext;
2118 :
2119 : if (pFile->locktype == RESERVED_LOCK) {
2120 : return 1; // already have a reserved lock
2121 : } else {
2122 : struct stat statBuf;
2123 : if (lstat(context->lockPath,&statBuf) == 0)
2124 : // file exists, someone else has the lock
2125 : return 1;
2126 : else
2127 : // file does not exist, we could have it if we want it
2128 : return 0;
2129 : }
2130 : }
2131 :
2132 : static int dotlockUnixLock(OsFile *id, int locktype) {
2133 : unixFile *pFile = (unixFile*)id;
2134 : dotlockLockingContext *context =
2135 : (dotlockLockingContext *) pFile->lockingContext;
2136 :
2137 : // if we already have a lock, it is exclusive.
2138 : // Just adjust level and punt on outta here.
2139 : if (pFile->locktype > NO_LOCK) {
2140 : pFile->locktype = locktype;
2141 :
2142 : /* Always update the timestamp on the old file */
2143 : utimes(context->lockPath,NULL);
2144 : return SQLITE_OK;
2145 : }
2146 :
2147 : // check to see if lock file already exists
2148 : struct stat statBuf;
2149 : if (lstat(context->lockPath,&statBuf) == 0){
2150 : return SQLITE_BUSY; // it does, busy
2151 : }
2152 :
2153 : // grab an exclusive lock
2154 : int fd = open(context->lockPath,O_RDONLY|O_CREAT|O_EXCL,0600);
2155 : if (fd < 0) {
2156 : // failed to open/create the file, someone else may have stolen the lock
2157 : return SQLITE_BUSY;
2158 : }
2159 : close(fd);
2160 :
2161 : // got it, set the type and return ok
2162 : pFile->locktype = locktype;
2163 : return SQLITE_OK;
2164 : }
2165 :
2166 : static int dotlockUnixUnlock(OsFile *id, int locktype) {
2167 : unixFile *pFile = (unixFile*)id;
2168 : dotlockLockingContext *context =
2169 : (dotlockLockingContext *) pFile->lockingContext;
2170 :
2171 : assert( locktype<=SHARED_LOCK );
2172 :
2173 : // no-op if possible
2174 : if( pFile->locktype==locktype ){
2175 : return SQLITE_OK;
2176 : }
2177 :
2178 : // shared can just be set because we always have an exclusive
2179 : if (locktype==SHARED_LOCK) {
2180 : pFile->locktype = locktype;
2181 : return SQLITE_OK;
2182 : }
2183 :
2184 : // no, really, unlock.
2185 : unlink(context->lockPath);
2186 : pFile->locktype = NO_LOCK;
2187 : return SQLITE_OK;
2188 : }
2189 :
2190 : /*
2191 : ** Close a file.
2192 : */
2193 : static int dotlockUnixClose(OsFile **pId) {
2194 : unixFile *id = (unixFile*)*pId;
2195 :
2196 : if( !id ) return SQLITE_OK;
2197 : dotlockUnixUnlock(*pId, NO_LOCK);
2198 : /* free the dotlock locking structure */
2199 : if (id->lockingContext != NULL) {
2200 : if (((dotlockLockingContext *)id->lockingContext)->lockPath != NULL)
2201 : sqlite3ThreadSafeFree( ( (dotlockLockingContext *)
2202 : id->lockingContext)->lockPath);
2203 : sqlite3ThreadSafeFree(id->lockingContext);
2204 : }
2205 :
2206 : if( id->dirfd>=0 ) close(id->dirfd);
2207 : id->dirfd = -1;
2208 : sqlite3OsEnterMutex();
2209 :
2210 : close(id->h);
2211 :
2212 : sqlite3OsLeaveMutex();
2213 : id->isOpen = 0;
2214 : OSTRACE2("CLOSE %-3d\n", id->h);
2215 : OpenCounter(-1);
2216 : sqlite3ThreadSafeFree(id);
2217 : *pId = 0;
2218 : return SQLITE_OK;
2219 : }
2220 :
2221 :
2222 : #pragma mark No locking
2223 :
2224 : /*
2225 : ** The nolockLockingContext is void
2226 : */
2227 : typedef void nolockLockingContext;
2228 :
2229 : static int nolockUnixCheckReservedLock(OsFile *id) {
2230 : return 0;
2231 : }
2232 :
2233 : static int nolockUnixLock(OsFile *id, int locktype) {
2234 : return SQLITE_OK;
2235 : }
2236 :
2237 : static int nolockUnixUnlock(OsFile *id, int locktype) {
2238 : return SQLITE_OK;
2239 : }
2240 :
2241 : /*
2242 : ** Close a file.
2243 : */
2244 : static int nolockUnixClose(OsFile **pId) {
2245 : unixFile *id = (unixFile*)*pId;
2246 :
2247 : if( !id ) return SQLITE_OK;
2248 : if( id->dirfd>=0 ) close(id->dirfd);
2249 : id->dirfd = -1;
2250 : sqlite3OsEnterMutex();
2251 :
2252 : close(id->h);
2253 :
2254 : sqlite3OsLeaveMutex();
2255 : id->isOpen = 0;
2256 : OSTRACE2("CLOSE %-3d\n", id->h);
2257 : OpenCounter(-1);
2258 : sqlite3ThreadSafeFree(id);
2259 : *pId = 0;
2260 : return SQLITE_OK;
2261 : }
2262 :
2263 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
2264 :
2265 : /*
2266 : ** Turn a relative pathname into a full pathname. Return a pointer
2267 : ** to the full pathname stored in space obtained from sqliteMalloc().
2268 : ** The calling function is responsible for freeing this space once it
2269 : ** is no longer needed.
2270 : */
2271 13 : char *sqlite3UnixFullPathname(const char *zRelative){
2272 13 : char *zFull = 0;
2273 13 : if( zRelative[0]=='/' ){
2274 13 : sqlite3SetString(&zFull, zRelative, (char*)0);
2275 : }else{
2276 0 : char *zBuf = sqliteMalloc(5000);
2277 0 : if( zBuf==0 ){
2278 0 : return 0;
2279 : }
2280 0 : zBuf[0] = 0;
2281 0 : sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
2282 : (char*)0);
2283 0 : sqliteFree(zBuf);
2284 : }
2285 :
2286 : #if 0
2287 : /*
2288 : ** Remove "/./" path elements and convert "/A/./" path elements
2289 : ** to just "/".
2290 : */
2291 : if( zFull ){
2292 : int i, j;
2293 : for(i=j=0; zFull[i]; i++){
2294 : if( zFull[i]=='/' ){
2295 : if( zFull[i+1]=='/' ) continue;
2296 : if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
2297 : i += 1;
2298 : continue;
2299 : }
2300 : if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
2301 : while( j>0 && zFull[j-1]!='/' ){ j--; }
2302 : i += 3;
2303 : continue;
2304 : }
2305 : }
2306 : zFull[j++] = zFull[i];
2307 : }
2308 : zFull[j] = 0;
2309 : }
2310 : #endif
2311 :
2312 13 : return zFull;
2313 : }
2314 :
2315 : /*
2316 : ** Change the value of the fullsync flag in the given file descriptor.
2317 : */
2318 30 : static void unixSetFullSync(OsFile *id, int v){
2319 30 : ((unixFile*)id)->fullSync = v;
2320 30 : }
2321 :
2322 : /*
2323 : ** Return the underlying file handle for an OsFile
2324 : */
2325 0 : static int unixFileHandle(OsFile *id){
2326 0 : return ((unixFile*)id)->h;
2327 : }
2328 :
2329 : /*
2330 : ** Return an integer that indices the type of lock currently held
2331 : ** by this handle. (Used for testing and analysis only.)
2332 : */
2333 0 : static int unixLockState(OsFile *id){
2334 0 : return ((unixFile*)id)->locktype;
2335 : }
2336 :
2337 : /*
2338 : ** Return the sector size in bytes of the underlying block device for
2339 : ** the specified file. This is almost always 512 bytes, but may be
2340 : ** larger for some devices.
2341 : **
2342 : ** SQLite code assumes this function cannot fail. It also assumes that
2343 : ** if two files are created in the same file-system directory (i.e.
2344 : ** a database and it's journal file) that the sector size will be the
2345 : ** same for both.
2346 : */
2347 14 : static int unixSectorSize(OsFile *id){
2348 14 : return SQLITE_DEFAULT_SECTOR_SIZE;
2349 : }
2350 :
2351 : /*
2352 : ** This vector defines all the methods that can operate on an OsFile
2353 : ** for unix.
2354 : */
2355 : static const IoMethod sqlite3UnixIoMethod = {
2356 : unixClose,
2357 : unixOpenDirectory,
2358 : unixRead,
2359 : unixWrite,
2360 : unixSeek,
2361 : unixTruncate,
2362 : unixSync,
2363 : unixSetFullSync,
2364 : unixFileHandle,
2365 : unixFileSize,
2366 : unixLock,
2367 : unixUnlock,
2368 : unixLockState,
2369 : unixCheckReservedLock,
2370 : unixSectorSize,
2371 : };
2372 :
2373 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
2374 : /*
2375 : ** This vector defines all the methods that can operate on an OsFile
2376 : ** for unix with AFP style file locking.
2377 : */
2378 : static const IoMethod sqlite3AFPLockingUnixIoMethod = {
2379 : afpUnixClose,
2380 : unixOpenDirectory,
2381 : unixRead,
2382 : unixWrite,
2383 : unixSeek,
2384 : unixTruncate,
2385 : unixSync,
2386 : unixSetFullSync,
2387 : unixFileHandle,
2388 : unixFileSize,
2389 : afpUnixLock,
2390 : afpUnixUnlock,
2391 : unixLockState,
2392 : afpUnixCheckReservedLock,
2393 : unixSectorSize,
2394 : };
2395 :
2396 : /*
2397 : ** This vector defines all the methods that can operate on an OsFile
2398 : ** for unix with flock() style file locking.
2399 : */
2400 : static const IoMethod sqlite3FlockLockingUnixIoMethod = {
2401 : flockUnixClose,
2402 : unixOpenDirectory,
2403 : unixRead,
2404 : unixWrite,
2405 : unixSeek,
2406 : unixTruncate,
2407 : unixSync,
2408 : unixSetFullSync,
2409 : unixFileHandle,
2410 : unixFileSize,
2411 : flockUnixLock,
2412 : flockUnixUnlock,
2413 : unixLockState,
2414 : flockUnixCheckReservedLock,
2415 : unixSectorSize,
2416 : };
2417 :
2418 : /*
2419 : ** This vector defines all the methods that can operate on an OsFile
2420 : ** for unix with dotlock style file locking.
2421 : */
2422 : static const IoMethod sqlite3DotlockLockingUnixIoMethod = {
2423 : dotlockUnixClose,
2424 : unixOpenDirectory,
2425 : unixRead,
2426 : unixWrite,
2427 : unixSeek,
2428 : unixTruncate,
2429 : unixSync,
2430 : unixSetFullSync,
2431 : unixFileHandle,
2432 : unixFileSize,
2433 : dotlockUnixLock,
2434 : dotlockUnixUnlock,
2435 : unixLockState,
2436 : dotlockUnixCheckReservedLock,
2437 : unixSectorSize,
2438 : };
2439 :
2440 : /*
2441 : ** This vector defines all the methods that can operate on an OsFile
2442 : ** for unix with dotlock style file locking.
2443 : */
2444 : static const IoMethod sqlite3NolockLockingUnixIoMethod = {
2445 : nolockUnixClose,
2446 : unixOpenDirectory,
2447 : unixRead,
2448 : unixWrite,
2449 : unixSeek,
2450 : unixTruncate,
2451 : unixSync,
2452 : unixSetFullSync,
2453 : unixFileHandle,
2454 : unixFileSize,
2455 : nolockUnixLock,
2456 : nolockUnixUnlock,
2457 : unixLockState,
2458 : nolockUnixCheckReservedLock,
2459 : unixSectorSize,
2460 : };
2461 :
2462 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
2463 :
2464 : /*
2465 : ** Allocate memory for a new unixFile and initialize that unixFile.
2466 : ** Write a pointer to the new unixFile into *pId.
2467 : ** If we run out of memory, close the file and return an error.
2468 : */
2469 : #ifdef SQLITE_ENABLE_LOCKING_STYLE
2470 : /*
2471 : ** When locking extensions are enabled, the filepath and locking style
2472 : ** are needed to determine the unixFile pMethod to use for locking operations.
2473 : ** The locking-style specific lockingContext data structure is created
2474 : ** and assigned here also.
2475 : */
2476 : static int allocateUnixFile(
2477 : int h, /* Open file descriptor of file being opened */
2478 : OsFile **pId, /* Write completed initialization here */
2479 : const char *zFilename, /* Name of the file being opened */
2480 : int delFlag /* Delete-on-or-before-close flag */
2481 : ){
2482 : sqlite3LockingStyle lockingStyle;
2483 : unixFile *pNew;
2484 : unixFile f;
2485 : int rc;
2486 :
2487 : memset(&f, 0, sizeof(f));
2488 : lockingStyle = sqlite3DetectLockingStyle(zFilename, h);
2489 : if ( lockingStyle == posixLockingStyle ) {
2490 : sqlite3OsEnterMutex();
2491 : rc = findLockInfo(h, &f.pLock, &f.pOpen);
2492 : sqlite3OsLeaveMutex();
2493 : if( rc ){
2494 : close(h);
2495 : unlink(zFilename);
2496 : return SQLITE_NOMEM;
2497 : }
2498 : } else {
2499 : // pLock and pOpen are only used for posix advisory locking
2500 : f.pLock = NULL;
2501 : f.pOpen = NULL;
2502 : }
2503 : if( delFlag ){
2504 : unlink(zFilename);
2505 : }
2506 : f.dirfd = -1;
2507 : f.h = h;
2508 : SET_THREADID(&f);
2509 : pNew = sqlite3ThreadSafeMalloc( sizeof(unixFile) );
2510 : if( pNew==0 ){
2511 : close(h);
2512 : sqlite3OsEnterMutex();
2513 : releaseLockInfo(f.pLock);
2514 : releaseOpenCnt(f.pOpen);
2515 : sqlite3OsLeaveMutex();
2516 : *pId = 0;
2517 : return SQLITE_NOMEM;
2518 : }else{
2519 : *pNew = f;
2520 : switch(lockingStyle) {
2521 : case afpLockingStyle:
2522 : /* afp locking uses the file path so it needs to be included in
2523 : ** the afpLockingContext */
2524 : pNew->pMethod = &sqlite3AFPLockingUnixIoMethod;
2525 : pNew->lockingContext =
2526 : sqlite3ThreadSafeMalloc(sizeof(afpLockingContext));
2527 : ((afpLockingContext *)pNew->lockingContext)->filePath =
2528 : sqlite3ThreadSafeMalloc(strlen(zFilename) + 1);
2529 : strcpy(((afpLockingContext *)pNew->lockingContext)->filePath,
2530 : zFilename);
2531 : srandomdev();
2532 : break;
2533 : case flockLockingStyle:
2534 : /* flock locking doesn't need additional lockingContext information */
2535 : pNew->pMethod = &sqlite3FlockLockingUnixIoMethod;
2536 : break;
2537 : case dotlockLockingStyle:
2538 : /* dotlock locking uses the file path so it needs to be included in
2539 : ** the dotlockLockingContext */
2540 : pNew->pMethod = &sqlite3DotlockLockingUnixIoMethod;
2541 : pNew->lockingContext = sqlite3ThreadSafeMalloc(
2542 : sizeof(dotlockLockingContext));
2543 : ((dotlockLockingContext *)pNew->lockingContext)->lockPath =
2544 : sqlite3ThreadSafeMalloc(strlen(zFilename) + strlen(".lock") + 1);
2545 : sprintf(((dotlockLockingContext *)pNew->lockingContext)->lockPath,
2546 : "%s.lock", zFilename);
2547 : break;
2548 : case posixLockingStyle:
2549 : /* posix locking doesn't need additional lockingContext information */
2550 : pNew->pMethod = &sqlite3UnixIoMethod;
2551 : break;
2552 : case noLockingStyle:
2553 : case unsupportedLockingStyle:
2554 : default:
2555 : pNew->pMethod = &sqlite3NolockLockingUnixIoMethod;
2556 : }
2557 : *pId = (OsFile*)pNew;
2558 : OpenCounter(+1);
2559 : return SQLITE_OK;
2560 : }
2561 : }
2562 : #else /* SQLITE_ENABLE_LOCKING_STYLE */
2563 : static int allocateUnixFile(
2564 : int h, /* Open file descriptor on file being opened */
2565 : OsFile **pId, /* Write the resul unixFile structure here */
2566 : const char *zFilename, /* Name of the file being opened */
2567 : int delFlag /* If true, delete the file on or before closing */
2568 28 : ){
2569 : unixFile *pNew;
2570 : unixFile f;
2571 : int rc;
2572 :
2573 28 : memset(&f, 0, sizeof(f));
2574 28 : sqlite3OsEnterMutex();
2575 28 : rc = findLockInfo(h, &f.pLock, &f.pOpen);
2576 28 : sqlite3OsLeaveMutex();
2577 28 : if( delFlag ){
2578 0 : unlink(zFilename);
2579 : }
2580 28 : if( rc ){
2581 0 : close(h);
2582 0 : return SQLITE_NOMEM;
2583 : }
2584 : OSTRACE3("OPEN %-3d %s\n", h, zFilename);
2585 28 : f.dirfd = -1;
2586 28 : f.h = h;
2587 : SET_THREADID(&f);
2588 28 : pNew = sqlite3ThreadSafeMalloc( sizeof(unixFile) );
2589 28 : if( pNew==0 ){
2590 0 : close(h);
2591 0 : sqlite3OsEnterMutex();
2592 0 : releaseLockInfo(f.pLock);
2593 0 : releaseOpenCnt(f.pOpen);
2594 0 : sqlite3OsLeaveMutex();
2595 0 : *pId = 0;
2596 0 : return SQLITE_NOMEM;
2597 : }else{
2598 28 : *pNew = f;
2599 28 : pNew->pMethod = &sqlite3UnixIoMethod;
2600 28 : *pId = (OsFile*)pNew;
2601 : OpenCounter(+1);
2602 28 : return SQLITE_OK;
2603 : }
2604 : }
2605 : #endif /* SQLITE_ENABLE_LOCKING_STYLE */
2606 :
2607 : #endif /* SQLITE_OMIT_DISKIO */
2608 : /***************************************************************************
2609 : ** Everything above deals with file I/O. Everything that follows deals
2610 : ** with other miscellanous aspects of the operating system interface
2611 : ****************************************************************************/
2612 :
2613 :
2614 : #ifndef SQLITE_OMIT_LOAD_EXTENSION
2615 : /*
2616 : ** Interfaces for opening a shared library, finding entry points
2617 : ** within the shared library, and closing the shared library.
2618 : */
2619 : #include <dlfcn.h>
2620 0 : void *sqlite3UnixDlopen(const char *zFilename){
2621 0 : return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
2622 : }
2623 0 : void *sqlite3UnixDlsym(void *pHandle, const char *zSymbol){
2624 0 : return dlsym(pHandle, zSymbol);
2625 : }
2626 0 : int sqlite3UnixDlclose(void *pHandle){
2627 0 : return dlclose(pHandle);
2628 : }
2629 : #endif /* SQLITE_OMIT_LOAD_EXTENSION */
2630 :
2631 : /*
2632 : ** Get information to seed the random number generator. The seed
2633 : ** is written into the buffer zBuf[256]. The calling function must
2634 : ** supply a sufficiently large buffer.
2635 : */
2636 4 : int sqlite3UnixRandomSeed(char *zBuf){
2637 : /* We have to initialize zBuf to prevent valgrind from reporting
2638 : ** errors. The reports issued by valgrind are incorrect - we would
2639 : ** prefer that the randomness be increased by making use of the
2640 : ** uninitialized space in zBuf - but valgrind errors tend to worry
2641 : ** some users. Rather than argue, it seems easier just to initialize
2642 : ** the whole array and silence valgrind, even if that means less randomness
2643 : ** in the random seed.
2644 : **
2645 : ** When testing, initializing zBuf[] to zero is all we do. That means
2646 : ** that we always use the same random number sequence. This makes the
2647 : ** tests repeatable.
2648 : */
2649 4 : memset(zBuf, 0, 256);
2650 : #if !defined(SQLITE_TEST)
2651 : {
2652 : int pid, fd;
2653 4 : fd = open("/dev/urandom", O_RDONLY);
2654 4 : if( fd<0 ){
2655 : time_t t;
2656 0 : time(&t);
2657 0 : memcpy(zBuf, &t, sizeof(t));
2658 0 : pid = getpid();
2659 0 : memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
2660 : }else{
2661 4 : read(fd, zBuf, 256);
2662 4 : close(fd);
2663 : }
2664 : }
2665 : #endif
2666 4 : return SQLITE_OK;
2667 : }
2668 :
2669 : /*
2670 : ** Sleep for a little while. Return the amount of time slept.
2671 : ** The argument is the number of milliseconds we want to sleep.
2672 : */
2673 0 : int sqlite3UnixSleep(int ms){
2674 : #if defined(HAVE_USLEEP) && HAVE_USLEEP
2675 : usleep(ms*1000);
2676 : return ms;
2677 : #else
2678 0 : sleep((ms+999)/1000);
2679 0 : return 1000*((ms+999)/1000);
2680 : #endif
2681 : }
2682 :
2683 : /*
2684 : ** Static variables used for thread synchronization.
2685 : **
2686 : ** inMutex the nesting depth of the recursive mutex. The thread
2687 : ** holding mutexMain can read this variable at any time.
2688 : ** But is must hold mutexAux to change this variable. Other
2689 : ** threads must hold mutexAux to read the variable and can
2690 : ** never write.
2691 : **
2692 : ** mutexOwner The thread id of the thread holding mutexMain. Same
2693 : ** access rules as for inMutex.
2694 : **
2695 : ** mutexOwnerValid True if the value in mutexOwner is valid. The same
2696 : ** access rules apply as for inMutex.
2697 : **
2698 : ** mutexMain The main mutex. Hold this mutex in order to get exclusive
2699 : ** access to SQLite data structures.
2700 : **
2701 : ** mutexAux An auxiliary mutex needed to access variables defined above.
2702 : **
2703 : ** Mutexes are always acquired in this order: mutexMain mutexAux. It
2704 : ** is not necessary to acquire mutexMain in order to get mutexAux - just
2705 : ** do not attempt to acquire them in the reverse order: mutexAux mutexMain.
2706 : ** Either get the mutexes with mutexMain first or get mutexAux only.
2707 : **
2708 : ** When running on a platform where the three variables inMutex, mutexOwner,
2709 : ** and mutexOwnerValid can be set atomically, the mutexAux is not required.
2710 : ** On many systems, all three are 32-bit integers and writing to a 32-bit
2711 : ** integer is atomic. I think. But there are no guarantees. So it seems
2712 : ** safer to protect them using mutexAux.
2713 : */
2714 : static int inMutex = 0;
2715 : #ifdef SQLITE_UNIX_THREADS
2716 : static pthread_t mutexOwner; /* Thread holding mutexMain */
2717 : static int mutexOwnerValid = 0; /* True if mutexOwner is valid */
2718 : static pthread_mutex_t mutexMain = PTHREAD_MUTEX_INITIALIZER; /* The mutex */
2719 : static pthread_mutex_t mutexAux = PTHREAD_MUTEX_INITIALIZER; /* Aux mutex */
2720 : #endif
2721 :
2722 : /*
2723 : ** The following pair of routine implement mutual exclusion for
2724 : ** multi-threaded processes. Only a single thread is allowed to
2725 : ** executed code that is surrounded by EnterMutex() and LeaveMutex().
2726 : **
2727 : ** SQLite uses only a single Mutex. There is not much critical
2728 : ** code and what little there is executes quickly and without blocking.
2729 : **
2730 : ** As of version 3.3.2, this mutex must be recursive.
2731 : */
2732 157 : void sqlite3UnixEnterMutex(){
2733 : #ifdef SQLITE_UNIX_THREADS
2734 : pthread_mutex_lock(&mutexAux);
2735 : if( !mutexOwnerValid || !pthread_equal(mutexOwner, pthread_self()) ){
2736 : pthread_mutex_unlock(&mutexAux);
2737 : pthread_mutex_lock(&mutexMain);
2738 : assert( inMutex==0 );
2739 : assert( !mutexOwnerValid );
2740 : pthread_mutex_lock(&mutexAux);
2741 : mutexOwner = pthread_self();
2742 : mutexOwnerValid = 1;
2743 : }
2744 : inMutex++;
2745 : pthread_mutex_unlock(&mutexAux);
2746 : #else
2747 157 : inMutex++;
2748 : #endif
2749 157 : }
2750 157 : void sqlite3UnixLeaveMutex(){
2751 : assert( inMutex>0 );
2752 : #ifdef SQLITE_UNIX_THREADS
2753 : pthread_mutex_lock(&mutexAux);
2754 : inMutex--;
2755 : assert( pthread_equal(mutexOwner, pthread_self()) );
2756 : if( inMutex==0 ){
2757 : assert( mutexOwnerValid );
2758 : mutexOwnerValid = 0;
2759 : pthread_mutex_unlock(&mutexMain);
2760 : }
2761 : pthread_mutex_unlock(&mutexAux);
2762 : #else
2763 157 : inMutex--;
2764 : #endif
2765 157 : }
2766 :
2767 : /*
2768 : ** Return TRUE if the mutex is currently held.
2769 : **
2770 : ** If the thisThrd parameter is true, return true only if the
2771 : ** calling thread holds the mutex. If the parameter is false, return
2772 : ** true if any thread holds the mutex.
2773 : */
2774 0 : int sqlite3UnixInMutex(int thisThrd){
2775 : #ifdef SQLITE_UNIX_THREADS
2776 : int rc;
2777 : pthread_mutex_lock(&mutexAux);
2778 : rc = inMutex>0 && (thisThrd==0 || pthread_equal(mutexOwner,pthread_self()));
2779 : pthread_mutex_unlock(&mutexAux);
2780 : return rc;
2781 : #else
2782 0 : return inMutex>0;
2783 : #endif
2784 : }
2785 :
2786 : /*
2787 : ** Remember the number of thread-specific-data blocks allocated.
2788 : ** Use this to verify that we are not leaking thread-specific-data.
2789 : ** Ticket #1601
2790 : */
2791 : #ifdef SQLITE_TEST
2792 : int sqlite3_tsd_count = 0;
2793 : # ifdef SQLITE_UNIX_THREADS
2794 : static pthread_mutex_t tsd_counter_mutex = PTHREAD_MUTEX_INITIALIZER;
2795 : # define TSD_COUNTER(N) \
2796 : pthread_mutex_lock(&tsd_counter_mutex); \
2797 : sqlite3_tsd_count += N; \
2798 : pthread_mutex_unlock(&tsd_counter_mutex);
2799 : # else
2800 : # define TSD_COUNTER(N) sqlite3_tsd_count += N
2801 : # endif
2802 : #else
2803 : # define TSD_COUNTER(N) /* no-op */
2804 : #endif
2805 :
2806 : /*
2807 : ** If called with allocateFlag>0, then return a pointer to thread
2808 : ** specific data for the current thread. Allocate and zero the
2809 : ** thread-specific data if it does not already exist.
2810 : **
2811 : ** If called with allocateFlag==0, then check the current thread
2812 : ** specific data. Return it if it exists. If it does not exist,
2813 : ** then return NULL.
2814 : **
2815 : ** If called with allocateFlag<0, check to see if the thread specific
2816 : ** data is allocated and is all zero. If it is then deallocate it.
2817 : ** Return a pointer to the thread specific data or NULL if it is
2818 : ** unallocated or gets deallocated.
2819 : */
2820 3751 : ThreadData *sqlite3UnixThreadSpecificData(int allocateFlag){
2821 : static const ThreadData zeroData = {0}; /* Initializer to silence warnings
2822 : ** from broken compilers */
2823 : #ifdef SQLITE_UNIX_THREADS
2824 : static pthread_key_t key;
2825 : static int keyInit = 0;
2826 : ThreadData *pTsd;
2827 :
2828 : if( !keyInit ){
2829 : sqlite3OsEnterMutex();
2830 : if( !keyInit ){
2831 : int rc;
2832 : rc = pthread_key_create(&key, 0);
2833 : if( rc ){
2834 : sqlite3OsLeaveMutex();
2835 : return 0;
2836 : }
2837 : keyInit = 1;
2838 : }
2839 : sqlite3OsLeaveMutex();
2840 : }
2841 :
2842 : pTsd = pthread_getspecific(key);
2843 : if( allocateFlag>0 ){
2844 : if( pTsd==0 ){
2845 : if( !sqlite3TestMallocFail() ){
2846 : pTsd = sqlite3OsMalloc(sizeof(zeroData));
2847 : }
2848 : #ifdef SQLITE_MEMDEBUG
2849 : sqlite3_isFail = 0;
2850 : #endif
2851 : if( pTsd ){
2852 : *pTsd = zeroData;
2853 : pthread_setspecific(key, pTsd);
2854 : TSD_COUNTER(+1);
2855 : }
2856 : }
2857 : }else if( pTsd!=0 && allocateFlag<0
2858 : && memcmp(pTsd, &zeroData, sizeof(ThreadData))==0 ){
2859 : sqlite3OsFree(pTsd);
2860 : pthread_setspecific(key, 0);
2861 : TSD_COUNTER(-1);
2862 : pTsd = 0;
2863 : }
2864 : return pTsd;
2865 : #else
2866 : static ThreadData *pTsd = 0;
2867 3751 : if( allocateFlag>0 ){
2868 0 : if( pTsd==0 ){
2869 : if( !sqlite3TestMallocFail() ){
2870 0 : pTsd = sqlite3OsMalloc( sizeof(zeroData) );
2871 : }
2872 : #ifdef SQLITE_MEMDEBUG
2873 : sqlite3_isFail = 0;
2874 : #endif
2875 0 : if( pTsd ){
2876 0 : *pTsd = zeroData;
2877 : TSD_COUNTER(+1);
2878 : }
2879 : }
2880 3751 : }else if( pTsd!=0 && allocateFlag<0
2881 : && memcmp(pTsd, &zeroData, sizeof(ThreadData))==0 ){
2882 0 : sqlite3OsFree(pTsd);
2883 : TSD_COUNTER(-1);
2884 0 : pTsd = 0;
2885 : }
2886 3751 : return pTsd;
2887 : #endif
2888 : }
2889 :
2890 : /*
2891 : ** The following variable, if set to a non-zero value, becomes the result
2892 : ** returned from sqlite3OsCurrentTime(). This is used for testing.
2893 : */
2894 : #ifdef SQLITE_TEST
2895 : int sqlite3_current_time = 0;
2896 : #endif
2897 :
2898 : /*
2899 : ** Find the current time (in Universal Coordinated Time). Write the
2900 : ** current time and date as a Julian Day number into *prNow and
2901 : ** return 0. Return 1 if the time and date cannot be found.
2902 : */
2903 0 : int sqlite3UnixCurrentTime(double *prNow){
2904 : #ifdef NO_GETTOD
2905 : time_t t;
2906 : time(&t);
2907 : *prNow = t/86400.0 + 2440587.5;
2908 : #else
2909 : struct timeval sNow;
2910 0 : gettimeofday(&sNow, 0);
2911 0 : *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
2912 : #endif
2913 : #ifdef SQLITE_TEST
2914 : if( sqlite3_current_time ){
2915 : *prNow = sqlite3_current_time/86400.0 + 2440587.5;
2916 : }
2917 : #endif
2918 0 : return 0;
2919 : }
2920 :
2921 : #endif /* OS_UNIX */
|