Spaces:
Running
Running
// TODO: consider moving to llama-impl.h if needed in more places | |
static std::string llama_format_win_err(DWORD err) { | |
LPSTR buf; | |
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, | |
NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL); | |
if (!size) { | |
return "FormatMessageA failed"; | |
} | |
std::string ret(buf, size); | |
LocalFree(buf); | |
return ret; | |
} | |
// llama_file | |
struct llama_file::impl { | |
HANDLE fp_win32; | |
std::string GetErrorMessageWin32(DWORD error_code) const { | |
std::string ret; | |
LPSTR lpMsgBuf = NULL; | |
DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, | |
NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL); | |
if (!bufLen) { | |
ret = format("Win32 error code: %lx", error_code); | |
} else { | |
ret = lpMsgBuf; | |
LocalFree(lpMsgBuf); | |
} | |
return ret; | |
} | |
impl(const char * fname, const char * mode) { | |
fp = ggml_fopen(fname, mode); | |
if (fp == NULL) { | |
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); | |
} | |
fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp)); | |
seek(0, SEEK_END); | |
size = tell(); | |
seek(0, SEEK_SET); | |
} | |
size_t tell() const { | |
LARGE_INTEGER li; | |
li.QuadPart = 0; | |
BOOL ret = SetFilePointerEx(fp_win32, li, &li, FILE_CURRENT); | |
if (!ret) { | |
throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str())); | |
} | |
return li.QuadPart; | |
} | |
void seek(size_t offset, int whence) const { | |
static_assert(SEEK_SET == FILE_BEGIN, "SEEK_SET != FILE_BEGIN"); | |
static_assert(SEEK_CUR == FILE_CURRENT, "SEEK_CUR != FILE_CURRENT"); | |
static_assert(SEEK_END == FILE_END, "SEEK_END != FILE_END"); | |
LARGE_INTEGER li; | |
li.QuadPart = offset; | |
BOOL ret = SetFilePointerEx(fp_win32, li, NULL, whence); | |
if (!ret) { | |
throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str())); | |
} | |
} | |
void read_raw(void * ptr, size_t len) const { | |
size_t bytes_read = 0; | |
while (bytes_read < len) { | |
size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024); | |
DWORD chunk_read = 0; | |
BOOL result = ReadFile(fp_win32, reinterpret_cast<char*>(ptr) + bytes_read, chunk_size, &chunk_read, NULL); | |
if (!result) { | |
throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str())); | |
} | |
if (chunk_read < chunk_size || chunk_read == 0) { | |
throw std::runtime_error("unexpectedly reached end of file"); | |
} | |
bytes_read += chunk_read; | |
} | |
} | |
uint32_t read_u32() const { | |
uint32_t val; | |
read_raw(&val, sizeof(val)); | |
return val; | |
} | |
void write_raw(const void * ptr, size_t len) const { | |
size_t bytes_written = 0; | |
while (bytes_written < len) { | |
size_t chunk_size = std::min<size_t>(len - bytes_written, 64*1024*1024); | |
DWORD chunk_written = 0; | |
BOOL result = WriteFile(fp_win32, reinterpret_cast<char const*>(ptr) + bytes_written, chunk_size, &chunk_written, NULL); | |
if (!result) { | |
throw std::runtime_error(format("write error: %s", GetErrorMessageWin32(GetLastError()).c_str())); | |
} | |
if (chunk_written < chunk_size || chunk_written == 0) { | |
throw std::runtime_error("unexpectedly failed to write bytes"); | |
} | |
bytes_written += chunk_written; | |
} | |
} | |
void write_u32(uint32_t val) const { | |
write_raw(&val, sizeof(val)); | |
} | |
~impl() { | |
if (fp) { | |
std::fclose(fp); | |
} | |
} | |
impl(const char * fname, const char * mode) { | |
fp = ggml_fopen(fname, mode); | |
if (fp == NULL) { | |
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); | |
} | |
seek(0, SEEK_END); | |
size = tell(); | |
seek(0, SEEK_SET); | |
} | |
size_t tell() const { | |
// TODO: this ifdef is never true? | |
__int64 ret = _ftelli64(fp); | |
long ret = std::ftell(fp); | |
if (ret == -1) { | |
throw std::runtime_error(format("ftell error: %s", strerror(errno))); | |
} | |
return (size_t) ret; | |
} | |
void seek(size_t offset, int whence) const { | |
// TODO: this ifdef is never true? | |
int ret = _fseeki64(fp, (__int64) offset, whence); | |
int ret = std::fseek(fp, (long) offset, whence); | |
if (ret != 0) { | |
throw std::runtime_error(format("seek error: %s", strerror(errno))); | |
} | |
} | |
void read_raw(void * ptr, size_t len) const { | |
if (len == 0) { | |
return; | |
} | |
errno = 0; | |
std::size_t ret = std::fread(ptr, len, 1, fp); | |
if (ferror(fp)) { | |
throw std::runtime_error(format("read error: %s", strerror(errno))); | |
} | |
if (ret != 1) { | |
throw std::runtime_error("unexpectedly reached end of file"); | |
} | |
} | |
uint32_t read_u32() const { | |
uint32_t ret; | |
read_raw(&ret, sizeof(ret)); | |
return ret; | |
} | |
void write_raw(const void * ptr, size_t len) const { | |
if (len == 0) { | |
return; | |
} | |
errno = 0; | |
size_t ret = std::fwrite(ptr, len, 1, fp); | |
if (ret != 1) { | |
throw std::runtime_error(format("write error: %s", strerror(errno))); | |
} | |
} | |
void write_u32(uint32_t val) const { | |
write_raw(&val, sizeof(val)); | |
} | |
~impl() { | |
if (fp) { | |
std::fclose(fp); | |
} | |
} | |
FILE * fp; | |
size_t size; | |
}; | |
llama_file::llama_file(const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {} | |
llama_file::~llama_file() = default; | |
size_t llama_file::tell() const { return pimpl->tell(); } | |
size_t llama_file::size() const { return pimpl->size; } | |
int llama_file::file_id() const { | |
return _fileno(pimpl->fp); | |
return fileno(pimpl->fp); | |
return ::fileno(pimpl->fp); | |
} | |
void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); } | |
void llama_file::read_raw(void * ptr, size_t len) const { pimpl->read_raw(ptr, len); } | |
uint32_t llama_file::read_u32() const { return pimpl->read_u32(); } | |
void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); } | |
void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); } | |
// llama_mmap | |
struct llama_mmap::impl { | |
std::vector<std::pair<size_t, size_t>> mapped_fragments; | |
impl(struct llama_file * file, size_t prefetch, bool numa) { | |
size = file->size(); | |
int fd = file->file_id(); | |
int flags = MAP_SHARED; | |
if (numa) { prefetch = 0; } | |
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) { | |
LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n", | |
strerror(errno)); | |
} | |
if (prefetch) { flags |= MAP_POPULATE; } | |
addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0); | |
if (addr == MAP_FAILED) { | |
throw std::runtime_error(format("mmap failed: %s", strerror(errno))); | |
} | |
if (prefetch > 0) { | |
if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) { | |
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n", | |
strerror(errno)); | |
} | |
} | |
if (numa) { | |
if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) { | |
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n", | |
strerror(errno)); | |
} | |
} | |
mapped_fragments.emplace_back(0, file->size()); | |
} | |
static void align_range(size_t * first, size_t * last, size_t page_size) { | |
size_t offset_in_page = *first & (page_size - 1); | |
size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page; | |
*first += offset_to_page; | |
*last = *last & ~(page_size - 1); | |
if (*last <= *first) { | |
*last = *first; | |
} | |
} | |
void unmap_fragment(size_t first, size_t last) { | |
int page_size = sysconf(_SC_PAGESIZE); | |
align_range(&first, &last, page_size); | |
size_t len = last - first; | |
if (len == 0) { | |
return; | |
} | |
GGML_ASSERT(first % page_size == 0); | |
GGML_ASSERT(last % page_size == 0); | |
GGML_ASSERT(last > first); | |
void * next_page_start = (uint8_t *) addr + first; | |
if (munmap(next_page_start, len)) { | |
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno)); | |
} | |
std::vector<std::pair<size_t, size_t>> new_mapped_fragments; | |
for (const auto & frag : mapped_fragments) { | |
if (frag.first < first && frag.second > last) { | |
new_mapped_fragments.emplace_back(frag.first, first); | |
new_mapped_fragments.emplace_back(last, frag.second); | |
} else if (frag.first < first && frag.second > first) { | |
new_mapped_fragments.emplace_back(frag.first, first); | |
} else if (frag.first < last && frag.second > last) { | |
new_mapped_fragments.emplace_back(last, frag.second); | |
} else if (frag.first >= first && frag.second <= last) { | |
} else { | |
new_mapped_fragments.push_back(frag); | |
} | |
} | |
mapped_fragments = std::move(new_mapped_fragments); | |
} | |
~impl() { | |
for (const auto & frag : mapped_fragments) { | |
if (munmap((char *) addr + frag.first, frag.second - frag.first)) { | |
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno)); | |
} | |
} | |
} | |
impl(struct llama_file * file, size_t prefetch, bool numa) { | |
GGML_UNUSED(numa); | |
size = file->size(); | |
HANDLE hFile = (HANDLE) _get_osfhandle(file->file_id()); | |
HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL); | |
if (hMapping == NULL) { | |
DWORD error = GetLastError(); | |
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str())); | |
} | |
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0); | |
DWORD error = GetLastError(); | |
CloseHandle(hMapping); | |
if (addr == NULL) { | |
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str())); | |
} | |
if (prefetch > 0) { | |
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG); | |
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll"); | |
pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory"); | |
if (pPrefetchVirtualMemory) { | |
WIN32_MEMORY_RANGE_ENTRY range; | |
range.VirtualAddress = addr; | |
range.NumberOfBytes = (SIZE_T) std::min(size, prefetch); | |
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) { | |
LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n", | |
llama_format_win_err(GetLastError()).c_str()); | |
} | |
} | |
throw std::runtime_error("PrefetchVirtualMemory unavailable"); | |
} | |
} | |
void unmap_fragment(size_t first, size_t last) { | |
GGML_UNUSED(first); | |
GGML_UNUSED(last); | |
} | |
~impl() { | |
if (!UnmapViewOfFile(addr)) { | |
LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n", | |
llama_format_win_err(GetLastError()).c_str()); | |
} | |
} | |
impl(struct llama_file * file, size_t prefetch, bool numa) { | |
GGML_UNUSED(file); | |
GGML_UNUSED(prefetch); | |
GGML_UNUSED(numa); | |
throw std::runtime_error("mmap not supported"); | |
} | |
void unmap_fragment(size_t first, size_t last) { | |
GGML_UNUSED(first); | |
GGML_UNUSED(last); | |
throw std::runtime_error("mmap not supported"); | |
} | |
void * addr; | |
size_t size; | |
}; | |
llama_mmap::llama_mmap(struct llama_file * file, size_t prefetch, bool numa) : pimpl(std::make_unique<impl>(file, prefetch, numa)) {} | |
llama_mmap::~llama_mmap() = default; | |
size_t llama_mmap::size() const { return pimpl->size; } | |
void * llama_mmap::addr() const { return pimpl->addr; } | |
void llama_mmap::unmap_fragment(size_t first, size_t last) { pimpl->unmap_fragment(first, last); } | |
const bool llama_mmap::SUPPORTED = true; | |
const bool llama_mmap::SUPPORTED = false; | |
// llama_mlock | |
struct llama_mlock::impl { | |
static size_t lock_granularity() { | |
return (size_t) sysconf(_SC_PAGESIZE); | |
} | |
bool raw_lock(const void * addr, size_t size) const { | |
if (!mlock(addr, size)) { | |
return true; | |
} | |
char* errmsg = std::strerror(errno); | |
bool suggest = (errno == ENOMEM); | |
struct rlimit lock_limit; | |
if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) { | |
suggest = false; | |
} | |
if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) { | |
suggest = false; | |
} | |
LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s", | |
size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : ""); | |
return false; | |
} | |
static void raw_unlock(void * addr, size_t size) { | |
if (munlock(addr, size)) { | |
LLAMA_LOG_WARN("warning: failed to munlock buffer: %s\n", std::strerror(errno)); | |
} | |
} | |
static size_t lock_granularity() { | |
SYSTEM_INFO si; | |
GetSystemInfo(&si); | |
return (size_t) si.dwPageSize; | |
} | |
bool raw_lock(void * ptr, size_t len) const { | |
for (int tries = 1; ; tries++) { | |
if (VirtualLock(ptr, len)) { | |
return true; | |
} | |
if (tries == 2) { | |
LLAMA_LOG_WARN("warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n", | |
len, size, llama_format_win_err(GetLastError()).c_str()); | |
return false; | |
} | |
SIZE_T min_ws_size, max_ws_size; | |
if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) { | |
LLAMA_LOG_WARN("warning: GetProcessWorkingSetSize failed: %s\n", | |
llama_format_win_err(GetLastError()).c_str()); | |
return false; | |
} | |
size_t increment = len + 1048576; | |
min_ws_size += increment; | |
max_ws_size += increment; | |
if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) { | |
LLAMA_LOG_WARN("warning: SetProcessWorkingSetSize failed: %s\n", | |
llama_format_win_err(GetLastError()).c_str()); | |
return false; | |
} | |
} | |
} | |
static void raw_unlock(void * ptr, size_t len) { | |
if (!VirtualUnlock(ptr, len)) { | |
LLAMA_LOG_WARN("warning: failed to VirtualUnlock buffer: %s\n", | |
llama_format_win_err(GetLastError()).c_str()); | |
} | |
} | |
static size_t lock_granularity() { | |
return (size_t) 65536; | |
} | |
bool raw_lock(const void * addr, size_t len) const { | |
LLAMA_LOG_WARN("warning: mlock not supported on this system\n"); | |
return false; | |
} | |
static void raw_unlock(const void * addr, size_t len) {} | |
impl() : addr(NULL), size(0), failed_already(false) {} | |
void init(void * ptr) { | |
GGML_ASSERT(addr == NULL && size == 0); | |
addr = ptr; | |
} | |
void grow_to(size_t target_size) { | |
GGML_ASSERT(addr); | |
if (failed_already) { | |
return; | |
} | |
size_t granularity = lock_granularity(); | |
target_size = (target_size + granularity - 1) & ~(granularity - 1); | |
if (target_size > size) { | |
if (raw_lock((uint8_t *) addr + size, target_size - size)) { | |
size = target_size; | |
} else { | |
failed_already = true; | |
} | |
} | |
} | |
void * addr; | |
size_t size; | |
bool failed_already; | |
}; | |
llama_mlock::llama_mlock() : pimpl(std::make_unique<impl>()) {} | |
llama_mlock::~llama_mlock() = default; | |
void llama_mlock::init(void * ptr) { pimpl->init(ptr); } | |
void llama_mlock::grow_to(size_t target_size) { pimpl->grow_to(target_size); } | |
const bool llama_mlock::SUPPORTED = true; | |
const bool llama_mlock::SUPPORTED = false; | |
size_t llama_path_max() { | |
return PATH_MAX; | |
} | |