Submitted By: Zack Winkles Date: 2004-02-04 Initial Package Version: 2.6.2 Upstream Status: submitted Origin: http://marc.theaimsgroup.com/?l=linux-kernel&m=107589464818859&w=2 Description: > Date: Wed, 4 Feb 2004 17:07:58 +0530 From: Maneesh Soni To: LKML Subject: [RFC/T 0/6] sysfs backing store (with symlink) Message-ID: <20040204113758.GA4234@in.ibm.com> Hi All, Please find following patches for sysfs-backing store. This version has support for putting symlinks also on backing store. Earlier it has support for text/binary attribute files. http://marc.theaimsgroup.com/?l=linux-kernel&m=107269078726254&w=2 Apart from a few bug fixes, the main change in this version is for symlinks. sysfs_create_link() now does not create dentry/inode for the link, but allocates a sysfs_dirent and adds it the parent sysfs_dirent's s_children list. dentry/inode for the link is created when the symlink is first looked up. I request Martin and Mackall to _replace_ the old patch set with the new one in their trees. With this we save approx 75-80% of lowmem requirements of sysfs. Symlinks support is needed for S390 linux people as they create lots of symlinks. I have some numbers collected on S390 machine after creating 4096 ctc devices. This created around 62433 sysfs entries. Backing store saves around 145 MB of Lowmem when sysfs files are not in use. --------------------------------------------------------------------------- Without sysfs backing store # cat /proc/sys/fs/dentry-state 66733 4100 45 0 0 0 # grep LowFree /proc/meminfo LowFree: 20984 kB # grep dentry /proc/slabinfo; grep -w inode_cache /proc/slabinfo dentry_cache 66750 66750 256 15 1 : tunables 120 60 8 : slabdata 4450 4450 0 inode_cache 62480 62480 768 5 1 : tunables 54 27 8 : slabdata 12496 12496 0 --------------------------------------------------------------------------- --------------------------------------------------------------------------- With sysfs backing store # cat /proc/sys/fs/dentry-state 8783 155 45 0 0 0 # grep LowFree /proc/meminfo LowFree: 166416 kB # grep dentry /proc/slabinfo; grep -w inode_cache /proc/slabinfo dentry_cache 8795 8880 256 15 1 : tunables 120 60 8 : slabdata 592 592 0 inode_cache 8495 8495 768 5 1 : tunables 54 27 8 : slabdata 1699 1699 0 **** Savings of around 145 MB of Lowmem ***** --------------------------------------------------------------------------- Thanks Maneesh diff -Naur linux-2.6.2.orig/fs/sysfs/bin.c linux-2.6.2/fs/sysfs/bin.c --- linux-2.6.2.orig/fs/sysfs/bin.c 2004-02-03 22:43:48.000000000 -0500 +++ linux-2.6.2/fs/sysfs/bin.c 2004-02-04 17:04:42.000000000 -0500 @@ -17,8 +17,10 @@ static int fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) { - struct bin_attribute * attr = dentry->d_fsdata; - struct kobject * kobj = dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd_attr = dentry->d_fsdata; + struct bin_attribute * attr = sd_attr->s_element; + struct sysfs_dirent * sd_kobj = dentry->d_parent->d_fsdata; + struct kobject * kobj = sd_kobj->s_element; return attr->read(kobj, buffer, off, count); } @@ -60,8 +62,10 @@ static int flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) { - struct bin_attribute *attr = dentry->d_fsdata; - struct kobject *kobj = dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd_attr = dentry->d_fsdata; + struct bin_attribute * attr = sd_attr->s_element; + struct sysfs_dirent * sd_kobj = dentry->d_parent->d_fsdata; + struct kobject * kobj = sd_kobj->s_element; return attr->write(kobj, buffer, offset, count); } @@ -94,8 +98,10 @@ static int open(struct inode * inode, struct file * file) { - struct kobject * kobj = kobject_get(file->f_dentry->d_parent->d_fsdata); - struct bin_attribute * attr = file->f_dentry->d_fsdata; + struct sysfs_dirent * sd_kobj = file->f_dentry->d_parent->d_fsdata; + struct kobject * kobj = kobject_get(sd_kobj->s_element); + struct sysfs_dirent * sd_attr = file->f_dentry->d_fsdata; + struct bin_attribute * attr = sd_attr->s_element; int error = -EINVAL; if (!kobj || !attr) @@ -122,7 +128,8 @@ static int release(struct inode * inode, struct file * file) { - struct kobject * kobj = file->f_dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd = file->f_dentry->d_parent->d_fsdata; + struct kobject * kobj = sd->s_element; u8 * buffer = file->private_data; if (kobj) @@ -131,7 +138,7 @@ return 0; } -static struct file_operations bin_fops = { +struct file_operations bin_fops = { .read = read, .write = write, .llseek = generic_file_llseek, @@ -148,31 +155,10 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - struct dentry * dentry; - struct dentry * parent; - int error = 0; - - if (!kobj || !attr) - return -EINVAL; - - parent = kobj->dentry; - - down(&parent->d_inode->i_sem); - dentry = sysfs_get_dentry(parent,attr->attr.name); - if (!IS_ERR(dentry)) { - dentry->d_fsdata = (void *)attr; - error = sysfs_create(dentry, - (attr->attr.mode & S_IALLUGO) | S_IFREG, - NULL); - if (!error) { - dentry->d_inode->i_size = attr->size; - dentry->d_inode->i_fop = &bin_fops; - } - dput(dentry); - } else - error = PTR_ERR(dentry); - up(&parent->d_inode->i_sem); - return error; + if (kobj && kobj->dentry && attr) + return sysfs_add_file(kobj->dentry, &attr->attr, + SYSFS_KOBJ_BIN_ATTR); + return -EINVAL; } diff -Naur linux-2.6.2.orig/fs/sysfs/dir.c linux-2.6.2/fs/sysfs/dir.c --- linux-2.6.2.orig/fs/sysfs/dir.c 2004-02-03 22:43:43.000000000 -0500 +++ linux-2.6.2/fs/sysfs/dir.c 2004-02-04 17:04:38.000000000 -0500 @@ -10,17 +10,151 @@ #include #include "sysfs.h" +struct inode_operations sysfs_dir_inode_operations = { + .lookup = sysfs_lookup, +}; + +struct file_operations sysfs_dir_operations = { + .open = dcache_dir_open, + .release = dcache_dir_close, + .llseek = sysfs_dir_lseek, + .read = generic_read_dir, + .readdir = sysfs_readdir, +}; + +static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) +{ + struct sysfs_dirent * sd = dentry->d_fsdata; + + if (sd) { + sd->s_dentry = NULL; + if ((sd->s_type & SYSFS_KOBJECT) || + (sd->s_type & SYSFS_KOBJ_ATTR_GROUP)) + kobject_put(sd->s_element); + sysfs_put(sd); + } + iput(inode); +} + +static struct dentry_operations sysfs_dentry_ops = { + .d_iput = sysfs_d_iput, +}; + +char * sysfs_get_name(struct sysfs_dirent *sd) +{ + struct attribute * attr; + struct bin_attribute * bin_attr; + char ** link_names; + + if (!sd || !sd->s_element) + BUG(); + + switch (sd->s_type) { + case SYSFS_KOBJ_ATTR: + attr = sd->s_element; + return attr->name; + + case SYSFS_KOBJ_BIN_ATTR: + bin_attr = sd->s_element; + return bin_attr->attr.name; + + case SYSFS_KOBJ_LINK: + link_names = sd->s_element; + return link_names[0]; + } + return NULL; +} + +static int init_file(struct inode * inode) +{ + inode->i_size = PAGE_SIZE; + inode->i_fop = &sysfs_file_operations; + return 0; +} + static int init_dir(struct inode * inode) { - inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inode->i_nlink++; return 0; } +/* attaches attribute's sysfs_dirent to the dentry corresponding to the + * attribute file + */ +static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) +{ + struct attribute * attr = NULL; + struct bin_attribute * bin_attr = NULL; + int (* init) (struct inode *) = NULL; + int error = 0; + if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) { + bin_attr = sd->s_element; + attr = &bin_attr->attr; + } else { + attr = sd->s_element; + init = init_file; + } + + error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); + if (error) + return error; + + if (bin_attr) { + dentry->d_inode->i_size = bin_attr->size; + dentry->d_inode->i_fop = &bin_fops; + } + dentry->d_op = &sysfs_dentry_ops; + dentry->d_fsdata = sysfs_get(sd); + sd->s_dentry = dentry; + d_rehash(dentry); + + return 0; +} + +static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) +{ + struct inode * dir = dentry->d_parent->d_inode; + char ** link_names = sd->s_element; + int err = 0; + + err = sysfs_symlink(dir, dentry, link_names[1]); + if (!err) { + dentry->d_op = &sysfs_dentry_ops; + dentry->d_fsdata = sysfs_get(sd); + sd->s_dentry = dentry; + d_rehash(dentry); + } + return err; +} + +struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd; + int err = 0; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_type & SYSFS_NOT_PINNED) { + char * name = sysfs_get_name(sd); + if (strcmp(name, dentry->d_name.name)) + continue; + if (sd->s_type & SYSFS_KOBJ_LINK) + err = sysfs_attach_link(sd, dentry); + else + err = sysfs_attach_attr(sd, dentry); + break; + } + } + + return ERR_PTR(err); +} + static int create_dir(struct kobject * k, struct dentry * p, const char * n, struct dentry ** d) { @@ -33,8 +167,21 @@ S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO, init_dir); if (!error) { - (*d)->d_fsdata = k; - p->d_inode->i_nlink++; + struct sysfs_dirent * sd, * parent_sd; + parent_sd = p->d_fsdata; + sd = sysfs_new_dirent(parent_sd, k, + (parent_sd->s_element == k) ? + SYSFS_KOBJ_ATTR_GROUP : + SYSFS_KOBJECT); + if (sd) { + (*d)->d_fsdata = sysfs_get(sd); + (*d)->d_op = &sysfs_dentry_ops; + p->d_inode->i_nlink++; + sd->s_element = kobject_get(k); + sd->s_dentry = *d; + d_rehash(*d); + } else + error = -ENOMEM; } dput(*d); } else @@ -43,7 +190,6 @@ return error; } - int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) { return create_dir(k,k->dentry,n,d); @@ -81,8 +227,13 @@ static void remove_dir(struct dentry * d) { struct dentry * parent = dget(d->d_parent); + struct sysfs_dirent * sd; + down(&parent->d_inode->i_sem); d_delete(d); + sd = d->d_fsdata; + list_del_init(&sd->s_sibling); + sysfs_put(d->d_fsdata); if (d->d_inode) simple_rmdir(parent->d_inode,d); @@ -96,6 +247,8 @@ void sysfs_remove_subdir(struct dentry * d) { remove_dir(d); + /* Needed to release the pinned attr. group subdirectory */ + dput(d); } @@ -110,46 +263,32 @@ void sysfs_remove_dir(struct kobject * kobj) { - struct list_head * node; struct dentry * dentry = dget(kobj->dentry); + struct dentry * d; + struct sysfs_dirent * parent_sd = dentry->d_fsdata; + struct sysfs_dirent * sd, * tmp; + char * name; if (!dentry) return; pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); down(&dentry->d_inode->i_sem); - - spin_lock(&dcache_lock); - node = dentry->d_subdirs.next; - while (node != &dentry->d_subdirs) { - struct dentry * d = list_entry(node,struct dentry,d_child); - list_del_init(node); - - pr_debug(" o %s (%d): ",d->d_name.name,atomic_read(&d->d_count)); - if (d->d_inode) { - d = dget_locked(d); - pr_debug("removing"); - - /** - * Unlink and unhash. - */ - spin_unlock(&dcache_lock); - d_delete(d); - simple_unlink(dentry->d_inode,d); - dput(d); - spin_lock(&dcache_lock); + list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { + if (sd->s_type & SYSFS_NOT_PINNED) { + name = sysfs_get_name(sd); + d = sysfs_get_dentry(dentry, name); + if (!IS_ERR(d) && d->d_inode) { + list_del_init(&sd->s_sibling); + sysfs_put(sd); + d_drop(d); + simple_unlink(dentry->d_inode, d); + } } - pr_debug(" done\n"); - node = dentry->d_subdirs.next; } - list_del_init(&dentry->d_child); - spin_unlock(&dcache_lock); up(&dentry->d_inode->i_sem); remove_dir(dentry); - /** - * Drop reference from dget() on entrance. - */ dput(dentry); } @@ -173,6 +312,166 @@ up(&parent->d_inode->i_sem); } +/* called under parent inode's i_sem */ +static void sysfs_close_dir_entries(struct dentry * parent) +{ + struct sysfs_dirent * parent_sd = parent->d_fsdata; + struct sysfs_dirent * sd, * tmp; + + list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { + if (sd->s_type & SYSFS_NOT_PINNED) { + struct dentry * dentry = sd->s_dentry; + if (dentry && dentry->d_inode) + dput(dentry); + } + } +} + +/* called under parent inode's i_sem */ +static int sysfs_open_dir_entries(struct dentry * parent) +{ + struct sysfs_dirent * parent_sd = parent->d_fsdata; + struct sysfs_dirent * sd; + struct dentry * dentry; + int error = 0; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_type & SYSFS_NOT_PINNED) { + char * name = sysfs_get_name(sd); + dentry = sysfs_get_dentry(parent, name); + if (IS_ERR(dentry)) + error = PTR_ERR(dentry); + if (dentry->d_fsdata != sd) { + if (sd->s_type & SYSFS_KOBJ_LINK) + error = sysfs_attach_link(sd, dentry); + else + error = sysfs_attach_attr(sd, dentry); + } + if (error) + break; + } + } + if (error) { + /* release all successfully opened entires so far*/ + sysfs_close_dir_entries(parent); + } + + return error; +} + +/* Relationship between i_mode and the DT_xxx types */ +static inline unsigned char dt_type(struct inode *inode) +{ + return (inode->i_mode >> 12) & 15; +} + +/* + * Directory is locked and all positive dentries in it are safe, since + * for ramfs-type trees they can't go away without unlink() or rmdir(), + * both impossible due to the lock on directory. + */ + +int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct dentry *cursor = filp->private_data; + struct list_head *p, *q = &cursor->d_child; + ino_t ino; + int i = filp->f_pos; + int err = 0; + + switch (i) { + case 0: + ino = dentry->d_inode->i_ino; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + default: + if ((err = sysfs_open_dir_entries(dentry))) + return err; + + spin_lock(&dcache_lock); + if (filp->f_pos == 2) { + list_del(q); + list_add(q, &dentry->d_subdirs); + } + for (p=q->next; p != &dentry->d_subdirs; p=p->next) { + struct dentry *next; + next = list_entry(p, struct dentry, d_child); + if (d_unhashed(next) || !next->d_inode) + continue; + + spin_unlock(&dcache_lock); + if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0) + return 0; + spin_lock(&dcache_lock); + /* next is still alive */ + list_del(q); + list_add(q, p); + p = q; + filp->f_pos++; + } + spin_unlock(&dcache_lock); + sysfs_close_dir_entries(dentry); + } + return 0; +} + +loff_t sysfs_dir_lseek(struct file *file, loff_t offset, int origin) +{ + int err = 0; + + down(&file->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += file->f_pos; + case 0: + if (offset >= 0) + break; + default: + up(&file->f_dentry->d_inode->i_sem); + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + if (file->f_pos >= 2) { + struct list_head *p; + struct dentry *cursor = file->private_data; + loff_t n = file->f_pos - 2; + + if ((err = sysfs_open_dir_entries(file->f_dentry))) { + offset = err; + goto exit; + } + + spin_lock(&dcache_lock); + list_del(&cursor->d_child); + p = file->f_dentry->d_subdirs.next; + while (n && p != &file->f_dentry->d_subdirs) { + struct dentry *next; + next = list_entry(p, struct dentry, d_child); + if (!d_unhashed(next) && next->d_inode) + n--; + p = p->next; + } + list_add_tail(&cursor->d_child, p); + spin_unlock(&dcache_lock); + sysfs_close_dir_entries(file->f_dentry); + } + } +exit: + up(&file->f_dentry->d_inode->i_sem); + return offset; +} EXPORT_SYMBOL(sysfs_create_dir); EXPORT_SYMBOL(sysfs_remove_dir); EXPORT_SYMBOL(sysfs_rename_dir); diff -Naur linux-2.6.2.orig/fs/sysfs/file.c linux-2.6.2/fs/sysfs/file.c --- linux-2.6.2.orig/fs/sysfs/file.c 2004-02-03 22:43:07.000000000 -0500 +++ linux-2.6.2/fs/sysfs/file.c 2004-02-04 17:04:40.000000000 -0500 @@ -9,14 +9,6 @@ #include "sysfs.h" -static struct file_operations sysfs_file_operations; - -static int init_file(struct inode * inode) -{ - inode->i_size = PAGE_SIZE; - inode->i_fop = &sysfs_file_operations; - return 0; -} #define to_subsys(k) container_of(k,struct subsystem,kset.kobj) #define to_sattr(a) container_of(a,struct subsys_attribute,attr) @@ -77,8 +69,10 @@ */ static int fill_read_buffer(struct file * file, struct sysfs_buffer * buffer) { - struct attribute * attr = file->f_dentry->d_fsdata; - struct kobject * kobj = file->f_dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd_attr = file->f_dentry->d_fsdata; + struct attribute * attr = sd_attr->s_element; + struct sysfs_dirent * sd_kobj = file->f_dentry->d_parent->d_fsdata; + struct kobject * kobj = sd_kobj->s_element; struct sysfs_ops * ops = buffer->ops; int ret = 0; ssize_t count; @@ -198,8 +192,10 @@ static int flush_write_buffer(struct file * file, struct sysfs_buffer * buffer, size_t count) { - struct attribute * attr = file->f_dentry->d_fsdata; - struct kobject * kobj = file->f_dentry->d_parent->d_fsdata; + struct sysfs_dirent * sd_attr = file->f_dentry->d_fsdata; + struct attribute * attr = sd_attr->s_element; + struct sysfs_dirent * sd_kobj = file->f_dentry->d_parent->d_fsdata; + struct kobject * kobj = sd_kobj->s_element; struct sysfs_ops * ops = buffer->ops; return ops->store(kobj,attr,buffer->page,count); @@ -238,8 +234,10 @@ static int check_perm(struct inode * inode, struct file * file) { - struct kobject * kobj = kobject_get(file->f_dentry->d_parent->d_fsdata); - struct attribute * attr = file->f_dentry->d_fsdata; + struct sysfs_dirent * sd_attr = file->f_dentry->d_fsdata; + struct attribute * attr = sd_attr->s_element; + struct sysfs_dirent * sd_kobj = file->f_dentry->d_parent->d_fsdata; + struct kobject * kobj = kobject_get(sd_kobj->s_element); struct sysfs_buffer * buffer; struct sysfs_ops * ops = NULL; int error = 0; @@ -320,8 +318,10 @@ static int sysfs_release(struct inode * inode, struct file * filp) { - struct kobject * kobj = filp->f_dentry->d_parent->d_fsdata; - struct attribute * attr = filp->f_dentry->d_fsdata; + struct sysfs_dirent * sd_attr = filp->f_dentry->d_fsdata; + struct attribute * attr = sd_attr->s_element; + struct sysfs_dirent * sd_kobj = filp->f_dentry->d_parent->d_fsdata; + struct kobject * kobj = sd_kobj->s_element; struct sysfs_buffer * buffer = filp->private_data; if (kobj) @@ -336,7 +336,7 @@ return 0; } -static struct file_operations sysfs_file_operations = { +struct file_operations sysfs_file_operations = { .read = sysfs_read_file, .write = sysfs_write_file, .llseek = generic_file_llseek, @@ -345,23 +345,18 @@ }; -int sysfs_add_file(struct dentry * dir, const struct attribute * attr) +int sysfs_add_file(struct dentry * parent, const struct attribute * attr, int t) { - struct dentry * dentry; - int error; + struct sysfs_dirent * sd; + struct sysfs_dirent * parent_sd = parent->d_fsdata; + int error = 0; - down(&dir->d_inode->i_sem); - dentry = sysfs_get_dentry(dir,attr->name); - if (!IS_ERR(dentry)) { - error = sysfs_create(dentry, - (attr->mode & S_IALLUGO) | S_IFREG, - init_file); - if (!error) - dentry->d_fsdata = (void *)attr; - dput(dentry); - } else - error = PTR_ERR(dentry); - up(&dir->d_inode->i_sem); + down(&parent->d_inode->i_sem); + sd = sysfs_new_dirent(parent_sd, (void *) attr, t); + if (!sd) + error = -ENOMEM; + up(&parent->d_inode->i_sem); + return error; } @@ -374,8 +369,8 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) { - if (kobj && attr) - return sysfs_add_file(kobj->dentry,attr); + if (kobj && kobj->dentry && attr) + return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); return -EINVAL; } diff -Naur linux-2.6.2.orig/fs/sysfs/group.c linux-2.6.2/fs/sysfs/group.c --- linux-2.6.2.orig/fs/sysfs/group.c 2004-02-03 22:43:56.000000000 -0500 +++ linux-2.6.2/fs/sysfs/group.c 2004-02-04 17:04:46.000000000 -0500 @@ -31,7 +31,7 @@ int error = 0; for (attr = grp->attrs; *attr && !error; attr++) { - error = sysfs_add_file(dir,*attr); + error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); } if (error) remove_files(dir,grp); @@ -68,12 +68,12 @@ if (grp->name) dir = sysfs_get_dentry(kobj->dentry,grp->name); else - dir = kobj->dentry; + dir = dget(kobj->dentry); remove_files(dir,grp); - dput(dir); if (grp->name) sysfs_remove_subdir(dir); + dput(dir); } diff -Naur linux-2.6.2.orig/fs/sysfs/inode.c linux-2.6.2/fs/sysfs/inode.c --- linux-2.6.2.orig/fs/sysfs/inode.c 2004-02-03 22:44:26.000000000 -0500 +++ linux-2.6.2/fs/sysfs/inode.c 2004-02-04 17:04:40.000000000 -0500 @@ -11,6 +11,8 @@ #include #include #include +#include "sysfs.h" + extern struct super_block * sysfs_sb; static struct address_space_operations sysfs_aops = { @@ -61,7 +63,8 @@ error = init(inode); if (!error) { d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + if (S_ISDIR(mode)) + dget(dentry); /* pin only directory dentry in core */ } else iput(inode); Done: @@ -96,14 +99,19 @@ pr_debug("sysfs: Removing %s (%d)\n", victim->d_name.name, atomic_read(&victim->d_count)); - d_delete(victim); - simple_unlink(dir->d_inode,victim); + if (S_ISDIR(victim->d_inode->i_mode)) { + d_delete(victim); + simple_unlink(dir->d_inode,victim); + } + else + d_drop(victim); } /* * Drop reference from sysfs_get_dentry() above. */ dput(victim); } + sysfs_remove_dirent(dir->d_fsdata, name); up(&dir->d_inode->i_sem); } diff -Naur linux-2.6.2.orig/fs/sysfs/mount.c linux-2.6.2/fs/sysfs/mount.c --- linux-2.6.2.orig/fs/sysfs/mount.c 2004-02-03 22:44:27.000000000 -0500 +++ linux-2.6.2/fs/sysfs/mount.c 2004-02-04 17:04:34.000000000 -0500 @@ -20,6 +20,14 @@ static struct super_operations sysfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, + .umount_begin = sysfs_umount_begin, +}; + +struct sysfs_dirent sysfs_root = { + .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), + .s_children = LIST_HEAD_INIT(sysfs_root.s_children), + .s_element = NULL, + .s_type = SYSFS_ROOT, }; static int sysfs_fill_super(struct super_block *sb, void *data, int silent) @@ -35,8 +43,8 @@ inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); if (inode) { - inode->i_op = &simple_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inode->i_nlink++; } else { @@ -50,6 +58,7 @@ iput(inode); return -ENOMEM; } + root->d_fsdata = &sysfs_root; sb->s_root = root; return 0; } @@ -60,6 +69,21 @@ return get_sb_single(fs_type, flags, data, sysfs_fill_super); } +/* For freeing zero refenced dentries / inodes while force unmounting + * + * Because still sysfs has dentries corresponding to the directories pinned + * in memory which keeps the super block always active and ->kill_sb is never + * called while unmounting, in order to free the memory used by un-used + * dentries and inodes sysfs has to be "force" umounted. + */ +void sysfs_umount_begin(struct super_block * sb) +{ + lock_super(sb); + if (sb->s_root) + shrink_dcache_parent(sb->s_root); + unlock_super(sb); +} + static struct file_system_type sysfs_fs_type = { .name = "sysfs", .get_sb = sysfs_get_sb, diff -Naur linux-2.6.2.orig/fs/sysfs/symlink.c linux-2.6.2/fs/sysfs/symlink.c --- linux-2.6.2.orig/fs/sysfs/symlink.c 2004-02-03 22:43:45.000000000 -0500 +++ linux-2.6.2/fs/sysfs/symlink.c 2004-02-04 17:04:43.000000000 -0500 @@ -15,7 +15,7 @@ return 0; } -static int sysfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +int sysfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) { int error; @@ -63,6 +63,27 @@ } } +static int sysfs_add_link(struct sysfs_dirent * parent_sd, char * name, char * target) +{ + struct sysfs_dirent * sd; + char ** link_names; + + link_names = kmalloc(sizeof(char *) * 2, GFP_KERNEL); + if (!link_names) + return -ENOMEM; + + link_names[0] = name; + link_names[1] = target; + + sd = sysfs_new_dirent(parent_sd, link_names, SYSFS_KOBJ_LINK); + if (!sd) { + kfree(link_names); + return -ENOMEM; + } + + return 0; +} + /** * sysfs_create_link - create symlink between two objects. * @kobj: object whose directory we're creating the link in. @@ -72,7 +93,6 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name) { struct dentry * dentry = kobj->dentry; - struct dentry * d; int error = 0; int size; int depth; @@ -97,14 +117,10 @@ pr_debug("%s: path = '%s'\n",__FUNCTION__,path); down(&dentry->d_inode->i_sem); - d = sysfs_get_dentry(dentry,name); - if (!IS_ERR(d)) - error = sysfs_symlink(dentry->d_inode,d,path); - else - error = PTR_ERR(d); - dput(d); + error = sysfs_add_link(dentry->d_fsdata, name, path); up(&dentry->d_inode->i_sem); - kfree(path); + if (error) + kfree(path); return error; } diff -Naur linux-2.6.2.orig/fs/sysfs/sysfs.h linux-2.6.2/fs/sysfs/sysfs.h --- linux-2.6.2.orig/fs/sysfs/sysfs.h 2004-02-03 22:44:20.000000000 -0500 +++ linux-2.6.2/fs/sysfs/sysfs.h 2004-02-04 17:04:46.000000000 -0500 @@ -1,4 +1,5 @@ +#include extern struct vfsmount * sysfs_mount; extern struct inode * sysfs_new_inode(mode_t mode); @@ -6,8 +7,81 @@ extern struct dentry * sysfs_get_dentry(struct dentry *, const char *); -extern int sysfs_add_file(struct dentry * dir, const struct attribute * attr); +extern int sysfs_add_file(struct dentry *, const struct attribute *, int); extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); extern void sysfs_remove_subdir(struct dentry *); + +extern loff_t sysfs_dir_lseek(struct file *, loff_t, int); +extern int sysfs_readdir(struct file *, void *, filldir_t); +extern void sysfs_umount_begin(struct super_block *); +extern char * sysfs_get_name(struct sysfs_dirent *); +extern struct dentry * sysfs_lookup(struct inode *, struct dentry *, struct nameidata *); +extern int sysfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname); + +extern struct file_operations sysfs_file_operations; +extern struct file_operations bin_fops; +extern struct inode_operations sysfs_dir_inode_operations; +extern struct file_operations sysfs_dir_operations; + + +static inline +struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * p, void * e, int t) +{ + struct sysfs_dirent * sd; + + sd = kmalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) + return NULL; + memset(sd, 0, sizeof(*sd)); + atomic_set(&sd->s_count, 1); + sd->s_element = e; + sd->s_type = t; + sd->s_dentry = NULL; + INIT_LIST_HEAD(&sd->s_children); + list_add(&sd->s_sibling, &p->s_children); + + return sd; +} + +static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) +{ + if (sd) { + WARN_ON(!atomic_read(&sd->s_count)); + atomic_inc(&sd->s_count); + } + return sd; +} + +static inline void sysfs_put(struct sysfs_dirent * sd) +{ + if (atomic_dec_and_test(&sd->s_count)) { + if (sd->s_type & SYSFS_KOBJ_LINK) { + char ** link_names = sd->s_element; + kfree(link_names[1]); + kfree(sd->s_element); + } + kfree(sd); + } +} + +static inline +void sysfs_remove_dirent(struct sysfs_dirent * parent_sd, const char * name) +{ + struct list_head * tmp; + + tmp = parent_sd->s_children.next; + while (tmp != & parent_sd->s_children) { + struct sysfs_dirent * sd; + sd = list_entry(tmp, struct sysfs_dirent, s_sibling); + tmp = tmp->next; + if (sd->s_type & SYSFS_NOT_PINNED) { + if (!strcmp(sysfs_get_name(sd), name)) { + list_del_init(&sd->s_sibling); + sysfs_put(sd); + } + } + } +} + diff -Naur linux-2.6.2.orig/include/linux/sysfs.h linux-2.6.2/include/linux/sysfs.h --- linux-2.6.2.orig/include/linux/sysfs.h 2004-02-03 22:43:42.000000000 -0500 +++ linux-2.6.2/include/linux/sysfs.h 2004-02-04 17:04:34.000000000 -0500 @@ -9,6 +9,8 @@ #ifndef _SYSFS_H_ #define _SYSFS_H_ +#include + struct kobject; struct module; @@ -42,6 +44,23 @@ extern void sysfs_rename_dir(struct kobject *, const char *new_name); +struct sysfs_dirent { + atomic_t s_count; + struct list_head s_sibling; + struct list_head s_children; + void * s_element; + int s_type; + struct dentry * s_dentry; +}; + +#define SYSFS_ROOT 0x0001 +#define SYSFS_KOBJECT 0x0002 +#define SYSFS_KOBJ_ATTR 0x0004 +#define SYSFS_KOBJ_BIN_ATTR 0x0008 +#define SYSFS_KOBJ_ATTR_GROUP 0x0010 +#define SYSFS_KOBJ_LINK 0x0020 +#define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK) + extern int sysfs_create_file(struct kobject *, const struct attribute *);