Go对文件的操作主要使用os包
文件操作中最常用的数据类型是string
和[]byte
string
是8位字节的集合,通常但不一定代表UTF-8编码的文本。string
可以为空,但是不能为nil
。string
的值是不能改变的。
string
的底层数据结构就是byte数组。
type stringStruct struct {
str unsafe.Pointer
len int
}
func gostringnocopy(str *byte) string { // 入参str指针就是指向byte的指针
ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
s := *(*string)(unsafe.Pointer(&ss))
return s
}
对于[]byte
来说,以下操作是可行的:
b := []byte("Hello Gopher!")
b[1] = 'T'
string,修改操作是被禁止的:
s := "Hello Gopher!"
s[1] = 'T'
而string能支持这样的操作:
s := "Hello Gopher!"
s = "Tello Gopher!"
字符串的值不能被更改,但可以被替换。 string
在底层都是结构体stringStruct{str: str_point, len: str_len}
,string
结构体的str
指针指向的是一个字符常量的地址, 这个地址里面的内容是不可以被改变的,因为它是只读的,但是这个指针可以指向不同的地址。
那么,以下操作的含义是不同的:
s := "S1" // 分配存储"S1"的内存空间,s结构体里的str指针指向这块内存
s = "S2" // 分配存储"S2"的内存空间,s结构体里的str指针转为指向这块内存
b := []byte{1} // 分配存储'1'数组的内存空间,b结构体的array指针指向这个数组。
b = []byte{2} // 将array的内容改为'2'
string
不可修改,意味它是只读属性,这样的好处就是:在并发场景下,我们可以在不加锁的控制下,多次使用同一字符串,在保证高效共享的情况下而不用担心安全问题。
两者可以相互转换
// string to []byte
s1 := "hello"
b := []byte(s1)
// []byte to string
s2 := string(b)
OpenFile
是最全面的文件操作函数, 其他操作函数大多基于OpenFile
func OpenFile(name string, flag int, perm FileMode) (*File, error)
OpenFile
根据特定的flag对文件进行操作
O_RDONLY
, O_WRONLY
, or O_RDWR
必须确定其一
const (
// Exactly one of O_RDONLY, O_WRONLY, or O_RDWR must be specified.
O_RDONLY int = syscall.O_RDONLY // open the file read-only.
O_WRONLY int = syscall.O_WRONLY // open the file write-only.
O_RDWR int = syscall.O_RDWR // open the file read-write.
// The remaining values may be or'ed in to control behavior.
O_APPEND int = syscall.O_APPEND // append data to the file when writing.
O_CREATE int = syscall.O_CREAT // create a new file if none exists.
O_EXCL int = syscall.O_EXCL // used with O_CREATE, file must not exist.
O_SYNC int = syscall.O_SYNC // open for synchronous I/O.
O_TRUNC int = syscall.O_TRUNC // truncate regular writable file when opened.
)
package main
import (
"log"
"os"
)
func main() {
f, err := os.OpenFile("notes.txt", os.O_RDWR|os.O_CREATE, 0755)
if err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
}
package main
import (
"log"
"os"
)
func main() {
// If the file doesn't exist, create it, or append to the file
f, err := os.OpenFile("access.log", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatal(err)
}
if _, err := f.Write([]byte("appended some data\n")); err != nil {
f.Close() // ignore error; Write error takes precedence
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
}
文件不存在 创建文件
文件存在 则截断(覆盖旧文件)
// Create creates or truncates the named file. If the file already exists,
// it is truncated. If the file does not exist, it is created with mode 0666
// (before umask). If successful, methods on the returned File can
// be used for I/O; the associated file descriptor has mode O_RDWR.
// If there is an error, it will be of type *PathError.
func Create(name string) (*File, error) {
return OpenFile(name, O_RDWR|O_CREATE|O_TRUNC, 0666)
}
创建临时文件
// CreateTemp creates a new temporary file in the directory dir,
// opens the file for reading and writing, and returns the resulting file.
// The filename is generated by taking pattern and adding a random string to the end.
// If pattern includes a "*", the random string replaces the last "*".
// If dir is the empty string, CreateTemp uses the default directory for temporary files, as returned by TempDir.
// Multiple programs or goroutines calling CreateTemp simultaneously will not choose the same file.
// The caller can use the file's Name method to find the pathname of the file.
// It is the caller's responsibility to remove the file when it is no longer needed.
func CreateTemp(dir, pattern string) (*File, error) {
if dir == "" {
dir = TempDir()
}
prefix, suffix, err := prefixAndSuffix(pattern)
if err != nil {
return nil, &PathError{Op: "createtemp", Path: pattern, Err: err}
}
prefix = joinPath(dir, prefix)
try := 0
for {
name := prefix + nextRandom() + suffix
f, err := OpenFile(name, O_RDWR|O_CREATE|O_EXCL, 0600)
if IsExist(err) {
if try++; try < 10000 {
continue
}
return nil, &PathError{Op: "createtemp", Path: prefix + "*" + suffix, Err: ErrExist}
}
return f, err
}
}
带有O_EXCL
,确保文件不存在
创建后需要显式移除文件
package main
import (
"log"
"os"
)
func main() {
f, err := os.CreateTemp("", "example")
if err != nil {
log.Fatal(err)
}
defer os.Remove(f.Name()) // clean up
if _, err := f.Write([]byte("content")); err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
}
打开文件 只读
文件不存在则报错
// Open opens the named file for reading. If successful, methods on
// the returned file can be used for reading; the associated file
// descriptor has mode O_RDONLY.
// If there is an error, it will be of type *PathError.
func Open(name string) (*File, error) {
return OpenFile(name, O_RDONLY, 0)
}
读取整个文件
// ReadFile reads the named file and returns the contents.
// A successful call returns err == nil, not err == EOF.
// Because ReadFile reads the whole file, it does not treat an EOF from Read
// as an error to be reported.
func ReadFile(name string) ([]byte, error) {
f, err := Open(name)
if err != nil {
return nil, err
}
defer f.Close()
var size int
if info, err := f.Stat(); err == nil {
size64 := info.Size()
if int64(int(size64)) == size64 {
size = int(size64)
}
}
size++ // one byte for final read at EOF
// If a file claims a small size, read at least 512 bytes.
// In particular, files in Linux's /proc claim size 0 but
// then do not work right if read in small pieces,
// so an initial read of 1 byte would not work correctly.
if size < 512 {
size = 512
}
data := make([]byte, 0, size)
for {
if len(data) >= cap(data) {
d := append(data[:cap(data)], 0)
data = d[:len(data)]
}
n, err := f.Read(data[len(data):cap(data)])
data = data[:len(data)+n]
if err != nil {
if err == io.EOF {
err = nil
}
return data, err
}
}
}
使用:
package main
import (
"log"
"os"
)
func main() {
data, err := os.ReadFile("testdata/hello")
if err != nil {
log.Fatal(err)
}
os.Stdout.Write(data)
}
将data写入文件 截断模式
// WriteFile writes data to the named file, creating it if necessary.
// If the file does not exist, WriteFile creates it with permissions perm (before umask);
// otherwise WriteFile truncates it before writing, without changing permissions.
func WriteFile(name string, data []byte, perm FileMode) error {
f, err := OpenFile(name, O_WRONLY|O_CREATE|O_TRUNC, perm)
if err != nil {
return err
}
_, err = f.Write(data)
if err1 := f.Close(); err1 != nil && err == nil {
err = err1
}
return err
}
使用:
package main
import (
"log"
"os"
)
func main() {
err := os.WriteFile("testdata/hello", []byte("Hello, Gophers!"), 0666)
if err != nil {
log.Fatal(err)
}
}
从文件读取len(b)
数量的bytes 存储于b 返回读取的bytes数和error
最后返回0, io.EOF
// Read reads up to len(b) bytes from the File and stores them in b.
// It returns the number of bytes read and any error encountered.
// At end of file, Read returns 0, io.EOF.
func (f *File) Read(b []byte) (n int, err error) {
if err := f.checkValid("read"); err != nil {
return 0, err
}
n, e := f.read(b)
return n, f.wrapErr("read", e)
}
从off开始读
// ReadAt reads len(b) bytes from the File starting at byte offset off.
// It returns the number of bytes read and the error, if any.
// ReadAt always returns a non-nil error when n < len(b).
// At end of file, that error is io.EOF.
func (f *File) ReadAt(b []byte, off int64) (n int, err error) {
if err := f.checkValid("read"); err != nil {
return 0, err
}
if off < 0 {
return 0, &PathError{Op: "readat", Path: f.name, Err: errors.New("negative offset")}
}
for len(b) > 0 {
m, e := f.pread(b, off)
if e != nil {
err = f.wrapErr("read", e)
break
}
n += m
b = b[m:]
off += int64(m)
}
return
}
为读或者写设置偏移量offset 返回新的offset
whence参数:
0
相对于文件开头1
相对于目前位置的offset2
相对于文件末尾
// Seek sets the offset for the next Read or Write on file to offset, interpreted
// according to whence: 0 means relative to the origin of the file, 1 means
// relative to the current offset, and 2 means relative to the end.
// It returns the new offset and an error, if any.
// The behavior of Seek on a file opened with O_APPEND is not specified.
//
// If f is a directory, the behavior of Seek varies by operating
// system; you can seek to the beginning of the directory on Unix-like
// operating systems, but not on Windows.
func (f *File) Seek(offset int64, whence int) (ret int64, err error) {
if err := f.checkValid("seek"); err != nil {
return 0, err
}
r, e := f.seek(offset, whence)
if e == nil && f.dirinfo != nil && r != 0 {
e = syscall.EISDIR
}
if e != nil {
return 0, f.wrapErr("seek", e)
}
return r, nil
}
// Write writes len(b) bytes from b to the File.
// It returns the number of bytes written and an error, if any.
// Write returns a non-nil error when n != len(b).
func (f *File) Write(b []byte) (n int, err error) {
if err := f.checkValid("write"); err != nil {
return 0, err
}
n, e := f.write(b)
if n < 0 {
n = 0
}
if n != len(b) {
err = io.ErrShortWrite
}
epipecheck(f, e)
if e != nil {
err = f.wrapErr("write", e)
}
return n, err
}
// WriteAt writes len(b) bytes to the File starting at byte offset off.
// It returns the number of bytes written and an error, if any.
// WriteAt returns a non-nil error when n != len(b).
//
// If file was opened with the O_APPEND flag, WriteAt returns an error.
func (f *File) WriteAt(b []byte, off int64) (n int, err error) {
if err := f.checkValid("write"); err != nil {
return 0, err
}
if f.appendMode {
return 0, errWriteAtInAppendMode
}
if off < 0 {
return 0, &PathError{Op: "writeat", Path: f.name, Err: errors.New("negative offset")}
}
for len(b) > 0 {
m, e := f.pwrite(b, off)
if e != nil {
err = f.wrapErr("write", e)
break
}
n += m
b = b[m:]
off += int64(m)
}
return
}
// WriteString is like Write, but writes the contents of string s rather than
// a slice of bytes.
func (f *File) WriteString(s string) (n int, err error) {
var b []byte
hdr := (*unsafeheader.Slice)(unsafe.Pointer(&b))
hdr.Data = (*unsafeheader.String)(unsafe.Pointer(&s)).Data
hdr.Cap = len(s)
hdr.Len = len(s)
return f.Write(b)
}
关闭文件
// Close closes the File, rendering it unusable for I/O.
// On files that support SetDeadline, any pending I/O operations will
// be canceled and return immediately with an ErrClosed error.
// Close will return an error if it has already been called.
func (f *File) Close() error {
if f == nil {
return ErrInvalid
}
return f.file.close()
}
文件信息
func (file *File) Stat() (FileInfo, error)
FileInfo:
// A FileInfo describes a file and is returned by Stat.
type FileInfo interface {
Name() string // base name of the file
Size() int64 // length in bytes for regular files; system-dependent for others
Mode() FileMode // file mode bits
ModTime() time.Time // modification time
IsDir() bool // abbreviation for Mode().IsDir()
Sys() any // underlying data source (can return nil)
}
包bufio
提供buffered I/O
NewReader
返回一个具有默认大小(4096
)buffer的Reader
// NewReader returns a new Reader whose buffer has the default size.
func NewReader(rd io.Reader) *Reader {
return NewReaderSize(rd, defaultBufSize)
}
要修改默认大小可以使用NewReaderSize
方法
NewReader
接收 具有Read
方法的接口实现当做参数
type Reader interface {
Read(p []byte) (n int, err error)
}
分块读取
func ReadFile(filepath string) error {
f, err := os.Open(filepath)
if err != nil {
return err
}
defer f.Close()
buffer := bufio.NewReader(f)
p := make([]byte, 8)
for {
n, err := buffer.Read(p)
if err != nil {
if err == io.EOF {
err = nil
}
return err
}
fmt.Println(string(p[:n])) // 最后一次读取的时候,有可能重复 需取前n个
}
}
对于含有\n
换行符等的文件可以使用ReadString
或者ReadBytes
分割读取
func ReadFile(filepath string) error {
f, err := os.Open(filepath)
if err != nil {
return err
}
defer f.Close()
buffer := bufio.NewReader(f)
for {
line, err := buffer.ReadString('\n')
fmt.Print(line)
switch {
case err == io.EOF:
return nil
case err != nil:
return err
}
}
}
ReadBytes
方法类似 读取的是[]byte
- Buffered b the number of bytes that can be read from the current buffer.
- Discard 略过n个数据
- Peek 返回n个数据 但停留原位置
- ReadByte 读取一个byte
- ReadRune 读取一个rune
- ReadSlice 读取到分隔符为止
Scanner
为读取类似于文件的有特定换行符之类的数据提供更方便的方式
分割函数split function
:
// SplitFunc is the signature of the split function used to tokenize the
// input. The arguments are an initial substring of the remaining unprocessed
// data and a flag, atEOF, that reports whether the Reader has no more data
// to give. The return values are the number of bytes to advance the input
// and the next token to return to the user, if any, plus an error, if any.
//
// Scanning stops if the function returns an error, in which case some of
// the input may be discarded. If that error is ErrFinalToken, scanning
// stops with no error.
//
// Otherwise, the Scanner advances the input. If the token is not nil,
// the Scanner returns it to the user. If the token is nil, the
// Scanner reads more data and continues scanning; if there is no more
// data--if atEOF was true--the Scanner returns. If the data does not
// yet hold a complete token, for instance if it has no newline while
// scanning lines, a SplitFunc can return (0, nil, nil) to signal the
// Scanner to read more data into the slice and try again with a
// longer slice starting at the same point in the input.
//
// The function is never called with an empty data slice unless atEOF
// is true. If atEOF is true, however, data may be non-empty and,
// as always, holds unprocessed text.
type SplitFunc func(data []byte, atEOF bool) (advance int, token []byte, err error)
函数返回要前进的数字advance
读取的token
以及err
err
不为空时 停止Scanning
如果err为ErrFinalToken
读取完成 停止Scanning
如果token不为nil 向用户返回token
如果token为nil 分两种情况
- atEOF == false 可以返回(0, nil, nil)让Scanner继续读
- atEOF == true 读取完成 返回
默认split function
是ScanLines
可以看到默认的分隔符是\n
// ScanLines is a split function for a Scanner that returns each line of
// text, stripped of any trailing end-of-line marker. The returned line may
// be empty. The end-of-line marker is one optional carriage return followed
// by one mandatory newline. In regular expression notation, it is `\r?\n`.
// The last non-empty line of input will be returned even if it has no
// newline.
func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
// We have a full newline-terminated line.
return i + 1, dropCR(data[0:i]), nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), dropCR(data), nil
}
// Request more data.
return 0, nil, nil
}
常用的split function
还有ScanWords
按word分割
// isSpace reports whether the character is a Unicode white space character.
// We avoid dependency on the unicode package, but check validity of the implementation
// in the tests.
func isSpace(r rune) bool {
if r <= '\u00FF' {
// Obvious ASCII ones: \t through \r plus space. Plus two Latin-1 oddballs.
switch r {
case ' ', '\t', '\n', '\v', '\f', '\r':
return true
case '\u0085', '\u00A0':
return true
}
return false
}
// High-valued ones.
if '\u2000' <= r && r <= '\u200a' {
return true
}
switch r {
case '\u1680', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000':
return true
}
return false
}
// ScanWords is a split function for a Scanner that returns each
// space-separated word of text, with surrounding spaces deleted. It will
// never return an empty string. The definition of space is set by
// unicode.IsSpace.
func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
// Skip leading spaces.
start := 0
for width := 0; start < len(data); start += width {
var r rune
r, width = utf8.DecodeRune(data[start:])
if !isSpace(r) {
break
}
}
// Scan until space, marking end of word.
for width, i := 0, start; i < len(data); i += width {
var r rune
r, width = utf8.DecodeRune(data[i:])
if isSpace(r) {
return i + width, data[start:i], nil
}
}
// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
if atEOF && len(data) > start {
return len(data), data[start:], nil
}
// Request more data.
return start, nil, nil
}
使用的时候 先Split
然后循环Scan
使用Text
或者Bytes
取输出 输出是不包括分隔符的:
package main
import (
"bufio"
"fmt"
"os"
"strings"
)
func main() {
// An artificial input source.
const input = "Now is the winter of our discontent,\nMade glorious summer by this sun of York.\n"
scanner := bufio.NewScanner(strings.NewReader(input))
// Set the split function for the scanning operation.
scanner.Split(bufio.ScanWords)
// Count the words.
count := 0
for scanner.Scan() {
fmt.Print(scanner.Text())
count++
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading input:", err)
}
fmt.Printf("%d\n", count)
}
定制分割函数:
package main
import (
"bufio"
"fmt"
"os"
"strings"
)
func onComma(data []byte, atEOF bool) (advance int, token []byte, err error) {
for i := 0; i < len(data); i++ {
if data[i] == ',' {
return i + 1, data[:i], nil
}
}
if !atEOF {
return 0, nil, nil
}
// There is one final token to be delivered, which may be the empty string.
// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
// but does not trigger an error to be returned from Scan itself.
return 0, data, bufio.ErrFinalToken
}
func main() {
// Comma-separated list; last entry is empty.
const input = "1,2,3,4,"
scanner := bufio.NewScanner(strings.NewReader(input))
scanner.Split(onComma)
// Scan.
for scanner.Scan() {
fmt.Printf("%q ", scanner.Text())
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading input:", err)
}
}